From 37bb2895561d3e63a631f10875567b4e33b30c07 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 19 Jun 2020 11:45:31 +0200 Subject: [PATCH] bpo-40943: PY_SSIZE_T_CLEAN required for '#' formats (GH-20784) The PY_SSIZE_T_CLEAN macro must now be defined to use PyArg_ParseTuple() and Py_BuildValue() "#" formats: "es#", "et#", "s#", "u#", "y#", "z#", "U#" and "Z#". See the PEP 353. Update _testcapi.test_buildvalue_issue38913(). --- Doc/c-api/arg.rst | 36 ++++---- Doc/whatsnew/3.10.rst | 7 ++ .../2020-06-10-18-37-26.bpo-40943.i4q7rK.rst | 5 ++ Modules/_testcapimodule.c | 19 ++-- Python/getargs.c | 87 ++++++------------- Python/modsupport.c | 33 +++---- 6 files changed, 86 insertions(+), 101 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2020-06-10-18-37-26.bpo-40943.i4q7rK.rst diff --git a/Doc/c-api/arg.rst b/Doc/c-api/arg.rst index b7baad589a7..26e872c5a34 100644 --- a/Doc/c-api/arg.rst +++ b/Doc/c-api/arg.rst @@ -55,13 +55,11 @@ which disallows mutable objects such as :class:`bytearray`. .. note:: - For all ``#`` variants of formats (``s#``, ``y#``, etc.), the type of - the length argument (int or :c:type:`Py_ssize_t`) is controlled by - defining the macro :c:macro:`PY_SSIZE_T_CLEAN` before including - :file:`Python.h`. If the macro was defined, length is a - :c:type:`Py_ssize_t` rather than an :c:type:`int`. This behavior will change - in a future Python version to only support :c:type:`Py_ssize_t` and - drop :c:type:`int` support. It is best to always define :c:macro:`PY_SSIZE_T_CLEAN`. + For all ``#`` variants of formats (``s#``, ``y#``, etc.), the macro + :c:macro:`PY_SSIZE_T_CLEAN` must be defined before including + :file:`Python.h`. On Python 3.9 and older, the type of the length argument + is :c:type:`Py_ssize_t` if the :c:macro:`PY_SSIZE_T_CLEAN` macro is defined, + or int otherwise. ``s`` (:class:`str`) [const char \*] @@ -90,7 +88,7 @@ which disallows mutable objects such as :class:`bytearray`. In this case the resulting C string may contain embedded NUL bytes. Unicode objects are converted to C strings using ``'utf-8'`` encoding. -``s#`` (:class:`str`, read-only :term:`bytes-like object`) [const char \*, int or :c:type:`Py_ssize_t`] +``s#`` (:class:`str`, read-only :term:`bytes-like object`) [const char \*, :c:type:`Py_ssize_t`] Like ``s*``, except that it doesn't accept mutable objects. The result is stored into two C variables, the first one a pointer to a C string, the second one its length. @@ -105,7 +103,7 @@ which disallows mutable objects such as :class:`bytearray`. Like ``s*``, but the Python object may also be ``None``, in which case the ``buf`` member of the :c:type:`Py_buffer` structure is set to ``NULL``. -``z#`` (:class:`str`, read-only :term:`bytes-like object` or ``None``) [const char \*, int or :c:type:`Py_ssize_t`] +``z#`` (:class:`str`, read-only :term:`bytes-like object` or ``None``) [const char \*, :c:type:`Py_ssize_t`] Like ``s#``, but the Python object may also be ``None``, in which case the C pointer is set to ``NULL``. @@ -124,7 +122,7 @@ which disallows mutable objects such as :class:`bytearray`. bytes-like objects. **This is the recommended way to accept binary data.** -``y#`` (read-only :term:`bytes-like object`) [const char \*, int or :c:type:`Py_ssize_t`] +``y#`` (read-only :term:`bytes-like object`) [const char \*, :c:type:`Py_ssize_t`] This variant on ``s#`` doesn't accept Unicode objects, only bytes-like objects. @@ -155,7 +153,7 @@ which disallows mutable objects such as :class:`bytearray`. Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using :c:func:`PyUnicode_AsWideCharString`. -``u#`` (:class:`str`) [const Py_UNICODE \*, int or :c:type:`Py_ssize_t`] +``u#`` (:class:`str`) [const Py_UNICODE \*, :c:type:`Py_ssize_t`] This variant on ``u`` stores into two C variables, the first one a pointer to a Unicode data buffer, the second one its length. This variant allows null code points. @@ -172,7 +170,7 @@ which disallows mutable objects such as :class:`bytearray`. Part of the old-style :c:type:`Py_UNICODE` API; please migrate to using :c:func:`PyUnicode_AsWideCharString`. -``Z#`` (:class:`str` or ``None``) [const Py_UNICODE \*, int or :c:type:`Py_ssize_t`] +``Z#`` (:class:`str` or ``None``) [const Py_UNICODE \*, :c:type:`Py_ssize_t`] Like ``u#``, but the Python object may also be ``None``, in which case the :c:type:`Py_UNICODE` pointer is set to ``NULL``. @@ -213,7 +211,7 @@ which disallows mutable objects such as :class:`bytearray`. recoding them. Instead, the implementation assumes that the byte string object uses the encoding passed in as parameter. -``es#`` (:class:`str`) [const char \*encoding, char \*\*buffer, int or :c:type:`Py_ssize_t` \*buffer_length] +``es#`` (:class:`str`) [const char \*encoding, char \*\*buffer, :c:type:`Py_ssize_t` \*buffer_length] This variant on ``s#`` is used for encoding Unicode into a character buffer. Unlike the ``es`` format, this variant allows input data which contains NUL characters. @@ -244,7 +242,7 @@ which disallows mutable objects such as :class:`bytearray`. In both cases, *\*buffer_length* is set to the length of the encoded data without the trailing NUL byte. -``et#`` (:class:`str`, :class:`bytes` or :class:`bytearray`) [const char \*encoding, char \*\*buffer, int or :c:type:`Py_ssize_t` \*buffer_length] +``et#`` (:class:`str`, :class:`bytes` or :class:`bytearray`) [const char \*encoding, char \*\*buffer, :c:type:`Py_ssize_t` \*buffer_length] Same as ``es#`` except that byte string objects are passed through without recoding them. Instead, the implementation assumes that the byte string object uses the encoding passed in as parameter. @@ -549,7 +547,7 @@ Building values Convert a null-terminated C string to a Python :class:`str` object using ``'utf-8'`` encoding. If the C string pointer is ``NULL``, ``None`` is used. - ``s#`` (:class:`str` or ``None``) [const char \*, int or :c:type:`Py_ssize_t`] + ``s#`` (:class:`str` or ``None``) [const char \*, :c:type:`Py_ssize_t`] Convert a C string and its length to a Python :class:`str` object using ``'utf-8'`` encoding. If the C string pointer is ``NULL``, the length is ignored and ``None`` is returned. @@ -558,14 +556,14 @@ Building values This converts a C string to a Python :class:`bytes` object. If the C string pointer is ``NULL``, ``None`` is returned. - ``y#`` (:class:`bytes`) [const char \*, int or :c:type:`Py_ssize_t`] + ``y#`` (:class:`bytes`) [const char \*, :c:type:`Py_ssize_t`] This converts a C string and its lengths to a Python object. If the C string pointer is ``NULL``, ``None`` is returned. ``z`` (:class:`str` or ``None``) [const char \*] Same as ``s``. - ``z#`` (:class:`str` or ``None``) [const char \*, int or :c:type:`Py_ssize_t`] + ``z#`` (:class:`str` or ``None``) [const char \*, :c:type:`Py_ssize_t`] Same as ``s#``. ``u`` (:class:`str`) [const wchar_t \*] @@ -573,7 +571,7 @@ Building values data to a Python Unicode object. If the Unicode buffer pointer is ``NULL``, ``None`` is returned. - ``u#`` (:class:`str`) [const wchar_t \*, int or :c:type:`Py_ssize_t`] + ``u#`` (:class:`str`) [const wchar_t \*, :c:type:`Py_ssize_t`] Convert a Unicode (UTF-16 or UCS-4) data buffer and its length to a Python Unicode object. If the Unicode buffer pointer is ``NULL``, the length is ignored and ``None`` is returned. @@ -581,7 +579,7 @@ Building values ``U`` (:class:`str` or ``None``) [const char \*] Same as ``s``. - ``U#`` (:class:`str` or ``None``) [const char \*, int or :c:type:`Py_ssize_t`] + ``U#`` (:class:`str` or ``None``) [const char \*, :c:type:`Py_ssize_t`] Same as ``s#``. ``i`` (:class:`int`) [int] diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 566827bf90f..9c1dca1152a 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -155,6 +155,13 @@ New Features Porting to Python 3.10 ---------------------- +* The ``PY_SSIZE_T_CLEAN`` macro must now be defined to use + :c:func:`PyArg_ParseTuple` and :c:func:`Py_BuildValue` formats which use + ``#``: ``es#``, ``et#``, ``s#``, ``u#``, ``y#``, ``z#``, ``U#`` and ``Z#``. + See :ref:`Parsing arguments and building values + ` and the :pep:`353`. + (Contributed by Victor Stinner in :issue:`40943`.) + * Since :c:func:`Py_TYPE()` is changed to the inline static function, ``Py_TYPE(obj) = new_type`` must be replaced with ``Py_SET_TYPE(obj, new_type)``: see :c:func:`Py_SET_TYPE()` (available since Python 3.9). For backward diff --git a/Misc/NEWS.d/next/C API/2020-06-10-18-37-26.bpo-40943.i4q7rK.rst b/Misc/NEWS.d/next/C API/2020-06-10-18-37-26.bpo-40943.i4q7rK.rst new file mode 100644 index 00000000000..360ddae34cb --- /dev/null +++ b/Misc/NEWS.d/next/C API/2020-06-10-18-37-26.bpo-40943.i4q7rK.rst @@ -0,0 +1,5 @@ +The ``PY_SSIZE_T_CLEAN`` macro must now be defined to use +:c:func:`PyArg_ParseTuple` and :c:func:`Py_BuildValue` formats which use ``#``: +``es#``, ``et#``, ``s#``, ``u#``, ``y#``, ``z#``, ``U#`` and ``Z#``. +See :ref:`Parsing arguments and building values ` and the +:pep:`353`. diff --git a/Modules/_testcapimodule.c b/Modules/_testcapimodule.c index 5302641a9a3..808483ebd7b 100644 --- a/Modules/_testcapimodule.c +++ b/Modules/_testcapimodule.c @@ -6868,29 +6868,36 @@ test_buildvalue_issue38913(PyObject *self, PyObject *Py_UNUSED(ignored)) PyObject *res; const char str[] = "string"; const Py_UNICODE unicode[] = L"unicode"; - PyErr_SetNone(PyExc_ZeroDivisionError); + assert(!PyErr_Occurred()); res = Py_BuildValue("(s#O)", str, 1, Py_None); assert(res == NULL); - if (!PyErr_ExceptionMatches(PyExc_ZeroDivisionError)) { + if (!PyErr_ExceptionMatches(PyExc_SystemError)) { return NULL; } + PyErr_Clear(); + res = Py_BuildValue("(z#O)", str, 1, Py_None); assert(res == NULL); - if (!PyErr_ExceptionMatches(PyExc_ZeroDivisionError)) { + if (!PyErr_ExceptionMatches(PyExc_SystemError)) { return NULL; } + PyErr_Clear(); + res = Py_BuildValue("(y#O)", str, 1, Py_None); assert(res == NULL); - if (!PyErr_ExceptionMatches(PyExc_ZeroDivisionError)) { + if (!PyErr_ExceptionMatches(PyExc_SystemError)) { return NULL; } + PyErr_Clear(); + res = Py_BuildValue("(u#O)", unicode, 1, Py_None); assert(res == NULL); - if (!PyErr_ExceptionMatches(PyExc_ZeroDivisionError)) { + if (!PyErr_ExceptionMatches(PyExc_SystemError)) { return NULL; } - PyErr_Clear(); + + Py_RETURN_NONE; } diff --git a/Python/getargs.c b/Python/getargs.c index cf0cc078368..aaf687a46b7 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -656,27 +656,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, char *msgbuf, size_t bufsize, freelist_t *freelist) { /* For # codes */ -#define FETCH_SIZE int *q=NULL;Py_ssize_t *q2=NULL;\ - if (flags & FLAG_SIZE_T) q2=va_arg(*p_va, Py_ssize_t*); \ - else { \ - if (PyErr_WarnEx(PyExc_DeprecationWarning, \ - "PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) { \ - return NULL; \ - } \ - q=va_arg(*p_va, int*); \ +#define REQUIRE_PY_SSIZE_T_CLEAN \ + if (!(flags & FLAG_SIZE_T)) { \ + PyErr_SetString(PyExc_SystemError, \ + "PY_SSIZE_T_CLEAN macro must be defined for '#' formats"); \ + return NULL; \ } -#define STORE_SIZE(s) \ - if (flags & FLAG_SIZE_T) \ - *q2=s; \ - else { \ - if (INT_MAX < s) { \ - PyErr_SetString(PyExc_OverflowError, \ - "size does not fit in an int"); \ - return converterr("", arg, msgbuf, bufsize); \ - } \ - *q = (int)s; \ - } -#define BUFFER_LEN ((flags & FLAG_SIZE_T) ? *q2:*q) #define RETURN_ERR_OCCURRED return msgbuf const char *format = *p_format; @@ -931,8 +916,9 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, if (count < 0) return converterr(buf, arg, msgbuf, bufsize); if (*format == '#') { - FETCH_SIZE; - STORE_SIZE(count); + REQUIRE_PY_SSIZE_T_CLEAN; + Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); + *psize = count; format++; } else { if (strlen(*p) != (size_t)count) { @@ -974,11 +960,12 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else if (*format == '#') { /* a string or read-only bytes-like object */ /* "s#" or "z#" */ const void **p = (const void **)va_arg(*p_va, const char **); - FETCH_SIZE; + REQUIRE_PY_SSIZE_T_CLEAN; + Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); if (c == 'z' && arg == Py_None) { *p = NULL; - STORE_SIZE(0); + *psize = 0; } else if (PyUnicode_Check(arg)) { Py_ssize_t len; @@ -987,7 +974,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, return converterr(CONV_UNICODE, arg, msgbuf, bufsize); *p = sarg; - STORE_SIZE(len); + *psize = len; } else { /* read-only bytes-like object */ /* XXX Really? */ @@ -995,7 +982,7 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, Py_ssize_t count = convertbuffer(arg, p, &buf); if (count < 0) return converterr(buf, arg, msgbuf, bufsize); - STORE_SIZE(count); + *psize = count; } format++; } else { @@ -1034,18 +1021,19 @@ _Py_COMP_DIAG_IGNORE_DEPR_DECLS if (*format == '#') { /* "u#" or "Z#" */ - FETCH_SIZE; + REQUIRE_PY_SSIZE_T_CLEAN; + Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); if (c == 'Z' && arg == Py_None) { *p = NULL; - STORE_SIZE(0); + *psize = 0; } else if (PyUnicode_Check(arg)) { Py_ssize_t len; *p = PyUnicode_AsUnicodeAndSize(arg, &len); if (*p == NULL) RETURN_ERR_OCCURRED; - STORE_SIZE(len); + *psize = len; } else return converterr(c == 'Z' ? "str or None" : "str", @@ -1160,22 +1148,11 @@ _Py_COMP_DIAG_POP trailing 0-byte */ - int *q = NULL; Py_ssize_t *q2 = NULL; - if (flags & FLAG_SIZE_T) { - q2 = va_arg(*p_va, Py_ssize_t*); - } - else { - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) - { - Py_DECREF(s); - return NULL; - } - q = va_arg(*p_va, int*); - } + REQUIRE_PY_SSIZE_T_CLEAN; + Py_ssize_t *psize = va_arg(*p_va, Py_ssize_t*); format++; - if (q == NULL && q2 == NULL) { + if (psize == NULL) { Py_DECREF(s); return converterr( "(buffer_len is NULL)", @@ -1195,30 +1172,20 @@ _Py_COMP_DIAG_POP arg, msgbuf, bufsize); } } else { - if (size + 1 > BUFFER_LEN) { + if (size + 1 > *psize) { Py_DECREF(s); PyErr_Format(PyExc_ValueError, "encoded string too long " "(%zd, maximum length %zd)", - (Py_ssize_t)size, (Py_ssize_t)(BUFFER_LEN-1)); + (Py_ssize_t)size, (Py_ssize_t)(*psize - 1)); RETURN_ERR_OCCURRED; } } memcpy(*buffer, ptr, size+1); - if (flags & FLAG_SIZE_T) { - *q2 = size; - } - else { - if (INT_MAX < size) { - Py_DECREF(s); - PyErr_SetString(PyExc_OverflowError, - "size does not fit in an int"); - return converterr("", arg, msgbuf, bufsize); - } - *q = (int)size; - } - } else { + *psize = size; + } + else { /* Using a 0-terminated buffer: - the encoded string has to be 0-terminated @@ -1356,9 +1323,7 @@ _Py_COMP_DIAG_POP *p_format = format; return NULL; -#undef FETCH_SIZE -#undef STORE_SIZE -#undef BUFFER_LEN +#undef REQUIRE_PY_SSIZE_T_CLEAN #undef RETURN_ERR_OCCURRED } diff --git a/Python/modsupport.c b/Python/modsupport.c index 845bdcb2b6f..2637039d4a1 100644 --- a/Python/modsupport.c +++ b/Python/modsupport.c @@ -283,6 +283,13 @@ do_mktuple(const char **p_format, va_list *p_va, char endchar, Py_ssize_t n, int static PyObject * do_mkvalue(const char **p_format, va_list *p_va, int flags) { +#define ERROR_NEED_PY_SSIZE_T_CLEAN \ + { \ + PyErr_SetString(PyExc_SystemError, \ + "PY_SSIZE_T_CLEAN macro must be defined for '#' formats"); \ + return NULL; \ + } + for (;;) { switch (*(*p_format)++) { case '(': @@ -341,14 +348,12 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags) Py_ssize_t n; if (**p_format == '#') { ++*p_format; - if (flags & FLAG_SIZE_T) + if (flags & FLAG_SIZE_T) { n = va_arg(*p_va, Py_ssize_t); + } else { n = va_arg(*p_va, int); - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) { - return NULL; - } + ERROR_NEED_PY_SSIZE_T_CLEAN; } } else @@ -394,14 +399,12 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags) Py_ssize_t n; if (**p_format == '#') { ++*p_format; - if (flags & FLAG_SIZE_T) + if (flags & FLAG_SIZE_T) { n = va_arg(*p_va, Py_ssize_t); + } else { n = va_arg(*p_va, int); - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) { - return NULL; - } + ERROR_NEED_PY_SSIZE_T_CLEAN; } } else @@ -432,14 +435,12 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags) Py_ssize_t n; if (**p_format == '#') { ++*p_format; - if (flags & FLAG_SIZE_T) + if (flags & FLAG_SIZE_T) { n = va_arg(*p_va, Py_ssize_t); + } else { n = va_arg(*p_va, int); - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "PY_SSIZE_T_CLEAN will be required for '#' formats", 1)) { - return NULL; - } + ERROR_NEED_PY_SSIZE_T_CLEAN; } } else @@ -507,6 +508,8 @@ do_mkvalue(const char **p_format, va_list *p_va, int flags) } } + +#undef ERROR_NEED_PY_SSIZE_T_CLEAN }