gh-111089: Add PyUnicode_AsUTF8() to the limited C API (#111121)

Add PyUnicode_AsUTF8() function to the limited C API.

multiprocessing posixshmem now uses PyUnicode_AsUTF8() instead of
PyUnicode_AsUTF8AndSize(): the extension is built with the limited C
API. The function now raises an exception if the filename contains an
embedded null character instead of truncating silently the filename.
This commit is contained in:
Victor Stinner 2023-10-20 19:29:27 +02:00 committed by GitHub
parent 264f4af506
commit d8f32be5b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 23 additions and 16 deletions

View File

@ -726,6 +726,7 @@ function,PyUnicode_AsUCS4,3.7,,
function,PyUnicode_AsUCS4Copy,3.7,, function,PyUnicode_AsUCS4Copy,3.7,,
function,PyUnicode_AsUTF16String,3.2,, function,PyUnicode_AsUTF16String,3.2,,
function,PyUnicode_AsUTF32String,3.2,, function,PyUnicode_AsUTF32String,3.2,,
function,PyUnicode_AsUTF8,3.13,,
function,PyUnicode_AsUTF8AndSize,3.10,, function,PyUnicode_AsUTF8AndSize,3.10,,
function,PyUnicode_AsUTF8String,3.2,, function,PyUnicode_AsUTF8String,3.2,,
function,PyUnicode_AsUnicodeEscapeString,3.2,, function,PyUnicode_AsUnicodeEscapeString,3.2,,

View File

@ -1069,6 +1069,9 @@ New Features
limited C API. limited C API.
(Contributed by Victor Stinner in :gh:`85283`.) (Contributed by Victor Stinner in :gh:`85283`.)
* Add :c:func:`PyUnicode_AsUTF8` function to the limited C API.
(Contributed by Victor Stinner in :gh:`111089`.)
Porting to Python 3.13 Porting to Python 3.13
---------------------- ----------------------

View File

@ -440,19 +440,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
const void *buffer, const void *buffer,
Py_ssize_t size); Py_ssize_t size);
/* --- Manage the default encoding ---------------------------------------- */
// Returns a pointer to the default encoding (UTF-8) of the
// Unicode object unicode.
//
// Raise an exception if the string contains embedded null characters.
// Use PyUnicode_AsUTF8AndSize() to accept embedded null characters.
//
// This function caches the UTF-8 encoded string in the Unicode object
// and subsequent calls will return the same string. The memory is released
// when the Unicode object is deallocated.
PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
/* === Characters Type APIs =============================================== */ /* === Characters Type APIs =============================================== */

View File

@ -443,7 +443,17 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
PyObject *unicode /* Unicode object */ PyObject *unicode /* Unicode object */
); );
// Returns a pointer to the default encoding (UTF-8) of the // Returns a pointer to the UTF-8 encoding of the Unicode object unicode.
//
// Raise an exception if the string contains embedded null characters.
// Use PyUnicode_AsUTF8AndSize() to accept embedded null characters.
//
// This function caches the UTF-8 encoded string in the Unicode object
// and subsequent calls will return the same string. The memory is released
// when the Unicode object is deallocated.
PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
// Returns a pointer to the UTF-8 encoding of the
// Unicode object unicode and the size of the encoded representation // Unicode object unicode and the size of the encoded representation
// in bytes stored in `*size` (if size is not NULL). // in bytes stored in `*size` (if size is not NULL).
// //

View File

@ -745,6 +745,7 @@ SYMBOL_NAMES = (
"PyUnicode_AsUCS4Copy", "PyUnicode_AsUCS4Copy",
"PyUnicode_AsUTF16String", "PyUnicode_AsUTF16String",
"PyUnicode_AsUTF32String", "PyUnicode_AsUTF32String",
"PyUnicode_AsUTF8",
"PyUnicode_AsUTF8AndSize", "PyUnicode_AsUTF8AndSize",
"PyUnicode_AsUTF8String", "PyUnicode_AsUTF8String",
"PyUnicode_AsUnicodeEscapeString", "PyUnicode_AsUnicodeEscapeString",

View File

@ -0,0 +1,2 @@
Add :c:func:`PyUnicode_AsUTF8` function to the limited C API. Patch by
Victor Stinner.

View File

@ -2478,3 +2478,5 @@
added = '3.13' added = '3.13'
[function.PySys_AuditTuple] [function.PySys_AuditTuple]
added = '3.13' added = '3.13'
[function.PyUnicode_AsUTF8]
added = '3.13'

View File

@ -44,7 +44,7 @@ _posixshmem_shm_open_impl(PyObject *module, PyObject *path, int flags,
{ {
int fd; int fd;
int async_err = 0; int async_err = 0;
const char *name = PyUnicode_AsUTF8AndSize(path, NULL); const char *name = PyUnicode_AsUTF8(path);
if (name == NULL) { if (name == NULL) {
return -1; return -1;
} }
@ -83,7 +83,7 @@ _posixshmem_shm_unlink_impl(PyObject *module, PyObject *path)
{ {
int rv; int rv;
int async_err = 0; int async_err = 0;
const char *name = PyUnicode_AsUTF8AndSize(path, NULL); const char *name = PyUnicode_AsUTF8(path);
if (name == NULL) { if (name == NULL) {
return NULL; return NULL;
} }

1
PC/python3dll.c generated
View File

@ -661,6 +661,7 @@ EXPORT_FUNC(PyUnicode_AsUCS4Copy)
EXPORT_FUNC(PyUnicode_AsUnicodeEscapeString) EXPORT_FUNC(PyUnicode_AsUnicodeEscapeString)
EXPORT_FUNC(PyUnicode_AsUTF16String) EXPORT_FUNC(PyUnicode_AsUTF16String)
EXPORT_FUNC(PyUnicode_AsUTF32String) EXPORT_FUNC(PyUnicode_AsUTF32String)
EXPORT_FUNC(PyUnicode_AsUTF8)
EXPORT_FUNC(PyUnicode_AsUTF8AndSize) EXPORT_FUNC(PyUnicode_AsUTF8AndSize)
EXPORT_FUNC(PyUnicode_AsUTF8String) EXPORT_FUNC(PyUnicode_AsUTF8String)
EXPORT_FUNC(PyUnicode_AsWideChar) EXPORT_FUNC(PyUnicode_AsWideChar)