From d8f32be5b6a736dc2fc9dca3f1bf176c82fc9b44 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 20 Oct 2023 19:29:27 +0200 Subject: [PATCH] gh-111089: Add PyUnicode_AsUTF8() to the limited C API (#111121) Add PyUnicode_AsUTF8() function to the limited C API. multiprocessing posixshmem now uses PyUnicode_AsUTF8() instead of PyUnicode_AsUTF8AndSize(): the extension is built with the limited C API. The function now raises an exception if the filename contains an embedded null character instead of truncating silently the filename. --- Doc/data/stable_abi.dat | 1 + Doc/whatsnew/3.13.rst | 3 +++ Include/cpython/unicodeobject.h | 13 ------------- Include/unicodeobject.h | 12 +++++++++++- Lib/test/test_stable_abi_ctypes.py | 1 + .../2023-10-20-18-07-24.gh-issue-111089.RxkyrQ.rst | 2 ++ Misc/stable_abi.toml | 2 ++ Modules/_multiprocessing/posixshmem.c | 4 ++-- PC/python3dll.c | 1 + 9 files changed, 23 insertions(+), 16 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2023-10-20-18-07-24.gh-issue-111089.RxkyrQ.rst diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index 811b1bd84d2..52d6d967d66 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -726,6 +726,7 @@ function,PyUnicode_AsUCS4,3.7,, function,PyUnicode_AsUCS4Copy,3.7,, function,PyUnicode_AsUTF16String,3.2,, function,PyUnicode_AsUTF32String,3.2,, +function,PyUnicode_AsUTF8,3.13,, function,PyUnicode_AsUTF8AndSize,3.10,, function,PyUnicode_AsUTF8String,3.2,, function,PyUnicode_AsUnicodeEscapeString,3.2,, diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 34e4d67224b..5da5f938061 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1069,6 +1069,9 @@ New Features limited C API. (Contributed by Victor Stinner in :gh:`85283`.) +* Add :c:func:`PyUnicode_AsUTF8` function to the limited C API. + (Contributed by Victor Stinner in :gh:`111089`.) + Porting to Python 3.13 ---------------------- diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index d67553c6657..d200fa0622c 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -440,19 +440,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( const void *buffer, Py_ssize_t size); -/* --- Manage the default encoding ---------------------------------------- */ - -// Returns a pointer to the default encoding (UTF-8) of the -// Unicode object unicode. -// -// Raise an exception if the string contains embedded null characters. -// Use PyUnicode_AsUTF8AndSize() to accept embedded null characters. -// -// This function caches the UTF-8 encoded string in the Unicode object -// and subsequent calls will return the same string. The memory is released -// when the Unicode object is deallocated. -PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode); - /* === Characters Type APIs =============================================== */ diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index 1e5753dae6c..ee7b769ce5a 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -443,7 +443,17 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( PyObject *unicode /* Unicode object */ ); -// Returns a pointer to the default encoding (UTF-8) of the +// Returns a pointer to the UTF-8 encoding of the Unicode object unicode. +// +// Raise an exception if the string contains embedded null characters. +// Use PyUnicode_AsUTF8AndSize() to accept embedded null characters. +// +// This function caches the UTF-8 encoded string in the Unicode object +// and subsequent calls will return the same string. The memory is released +// when the Unicode object is deallocated. +PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode); + +// Returns a pointer to the UTF-8 encoding of the // Unicode object unicode and the size of the encoded representation // in bytes stored in `*size` (if size is not NULL). // diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index 6d5353c2276..88bc0fd4025 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -745,6 +745,7 @@ SYMBOL_NAMES = ( "PyUnicode_AsUCS4Copy", "PyUnicode_AsUTF16String", "PyUnicode_AsUTF32String", + "PyUnicode_AsUTF8", "PyUnicode_AsUTF8AndSize", "PyUnicode_AsUTF8String", "PyUnicode_AsUnicodeEscapeString", diff --git a/Misc/NEWS.d/next/C API/2023-10-20-18-07-24.gh-issue-111089.RxkyrQ.rst b/Misc/NEWS.d/next/C API/2023-10-20-18-07-24.gh-issue-111089.RxkyrQ.rst new file mode 100644 index 00000000000..fe32e06fe4f --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-10-20-18-07-24.gh-issue-111089.RxkyrQ.rst @@ -0,0 +1,2 @@ +Add :c:func:`PyUnicode_AsUTF8` function to the limited C API. Patch by +Victor Stinner. diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index 75c260c8f1b..0601de20fe0 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2478,3 +2478,5 @@ added = '3.13' [function.PySys_AuditTuple] added = '3.13' +[function.PyUnicode_AsUTF8] + added = '3.13' diff --git a/Modules/_multiprocessing/posixshmem.c b/Modules/_multiprocessing/posixshmem.c index 317381a47ee..c4d1138534d 100644 --- a/Modules/_multiprocessing/posixshmem.c +++ b/Modules/_multiprocessing/posixshmem.c @@ -44,7 +44,7 @@ _posixshmem_shm_open_impl(PyObject *module, PyObject *path, int flags, { int fd; int async_err = 0; - const char *name = PyUnicode_AsUTF8AndSize(path, NULL); + const char *name = PyUnicode_AsUTF8(path); if (name == NULL) { return -1; } @@ -83,7 +83,7 @@ _posixshmem_shm_unlink_impl(PyObject *module, PyObject *path) { int rv; int async_err = 0; - const char *name = PyUnicode_AsUTF8AndSize(path, NULL); + const char *name = PyUnicode_AsUTF8(path); if (name == NULL) { return NULL; } diff --git a/PC/python3dll.c b/PC/python3dll.c index d12889f44d6..7f5d97ae4dc 100755 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -661,6 +661,7 @@ EXPORT_FUNC(PyUnicode_AsUCS4Copy) EXPORT_FUNC(PyUnicode_AsUnicodeEscapeString) EXPORT_FUNC(PyUnicode_AsUTF16String) EXPORT_FUNC(PyUnicode_AsUTF32String) +EXPORT_FUNC(PyUnicode_AsUTF8) EXPORT_FUNC(PyUnicode_AsUTF8AndSize) EXPORT_FUNC(PyUnicode_AsUTF8String) EXPORT_FUNC(PyUnicode_AsWideChar)