gh-106320: Remove private _PyUnicode codecs C API functions (#106385)

Remove private _PyUnicode codecs C API functions: move them to the internal C API (pycore_unicodeobject.h). No longer export most of these functions.
2023-07-04 09:29:52 +02:00 · 2023-07-04 09:29:52 +02:00 · d8c5d76da2
parent 3406f8cce5
commit d8c5d76da2
3 changed files with 101 additions and 106 deletions
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@ -461,112 +461,6 @@ PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);

 #define _PyUnicode_AsString PyUnicode_AsUTF8

-/* --- UTF-7 Codecs ------------------------------------------------------- */
-
-PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
-    PyObject *unicode,          /* Unicode object */
-    int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
-    int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
-    const char *errors          /* error handling */
-    );
-
-/* --- UTF-8 Codecs ------------------------------------------------------- */
-
-PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
-    PyObject *unicode,
-    const char *errors);
-
-/* --- UTF-32 Codecs ------------------------------------------------------ */
-
-PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
-    PyObject *object,           /* Unicode object */
-    const char *errors,         /* error handling */
-    int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
-    );
-
-/* --- UTF-16 Codecs ------------------------------------------------------ */
-
-/* Returns a Python string object holding the UTF-16 encoded value of
-   the Unicode data.
-
-   If byteorder is not 0, output is written according to the following
-   byte order:
-
-   byteorder == -1: little endian
-   byteorder == 0:  native byte order (writes a BOM mark)
-   byteorder == 1:  big endian
-
-   If byteorder is 0, the output string will always start with the
-   Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
-   prepended.
-*/
-PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
-    PyObject* unicode,          /* Unicode object */
-    const char *errors,         /* error handling */
-    int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
-    );
-
-/* --- Unicode-Escape Codecs ---------------------------------------------- */
-
-/* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */
-PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeStateful(
-        const char *string,     /* Unicode-Escape encoded string */
-        Py_ssize_t length,      /* size of string */
-        const char *errors,     /* error handling */
-        Py_ssize_t *consumed    /* bytes consumed */
-);
-/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
-   chars. */
-PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
-        const char *string,     /* Unicode-Escape encoded string */
-        Py_ssize_t length,      /* size of string */
-        const char *errors,     /* error handling */
-        Py_ssize_t *consumed,   /* bytes consumed */
-        const char **first_invalid_escape  /* on return, points to first
-                                              invalid escaped char in
-                                              string. */
-);
-
-/* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */
-
-/* Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. */
-PyAPI_FUNC(PyObject*) _PyUnicode_DecodeRawUnicodeEscapeStateful(
-        const char *string,     /* Unicode-Escape encoded string */
-        Py_ssize_t length,      /* size of string */
-        const char *errors,     /* error handling */
-        Py_ssize_t *consumed    /* bytes consumed */
-);
-
-/* --- Latin-1 Codecs ----------------------------------------------------- */
-
-PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
-    PyObject* unicode,
-    const char* errors);
-
-/* --- ASCII Codecs ------------------------------------------------------- */
-
-PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
-    PyObject* unicode,
-    const char* errors);
-
-/* --- Character Map Codecs ----------------------------------------------- */
-
-/* Translate an Unicode object by applying a character mapping table to
-   it and return the resulting Unicode object.
-
-   The mapping table must map Unicode ordinal integers to Unicode strings,
-   Unicode ordinal integers or None (causing deletion of the character).
-
-   Mapping tables may be dictionaries or sequences. Unmapped character
-   ordinals (ones which cause a LookupError) are left untouched and
-   are copied as-is.
-*/
-PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
-    PyObject *unicode,          /* Unicode object */
-    PyObject *mapping,          /* encoding mapping */
-    const char *errors          /* error handling */
-    );
-
 /* --- Decimal Encoder ---------------------------------------------------- */

 /* Coverts a Unicode object holding a decimal value to an ASCII string
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@ -177,6 +177,106 @@ PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
    Py_ssize_t start,
    Py_ssize_t end);

+/* --- UTF-7 Codecs ------------------------------------------------------- */
+
+extern PyObject* _PyUnicode_EncodeUTF7(
+    PyObject *unicode,          /* Unicode object */
+    int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
+    int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
+    const char *errors);        /* error handling */
+
+/* --- UTF-8 Codecs ------------------------------------------------------- */
+
+PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
+    PyObject *unicode,
+    const char *errors);
+
+/* --- UTF-32 Codecs ------------------------------------------------------ */
+
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
+    PyObject *object,           /* Unicode object */
+    const char *errors,         /* error handling */
+    int byteorder);             /* byteorder to use 0=BOM+native;-1=LE,1=BE */
+
+/* --- UTF-16 Codecs ------------------------------------------------------ */
+
+/* Returns a Python string object holding the UTF-16 encoded value of
+   the Unicode data.
+
+   If byteorder is not 0, output is written according to the following
+   byte order:
+
+   byteorder == -1: little endian
+   byteorder == 0:  native byte order (writes a BOM mark)
+   byteorder == 1:  big endian
+
+   If byteorder is 0, the output string will always start with the
+   Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
+   prepended.
+*/
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
+    PyObject* unicode,          /* Unicode object */
+    const char *errors,         /* error handling */
+    int byteorder);             /* byteorder to use 0=BOM+native;-1=LE,1=BE */
+
+/* --- Unicode-Escape Codecs ---------------------------------------------- */
+
+/* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */
+extern PyObject* _PyUnicode_DecodeUnicodeEscapeStateful(
+    const char *string,     /* Unicode-Escape encoded string */
+    Py_ssize_t length,      /* size of string */
+    const char *errors,     /* error handling */
+    Py_ssize_t *consumed);  /* bytes consumed */
+
+/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
+   chars. */
+PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal(
+    const char *string,     /* Unicode-Escape encoded string */
+    Py_ssize_t length,      /* size of string */
+    const char *errors,     /* error handling */
+    Py_ssize_t *consumed,   /* bytes consumed */
+    const char **first_invalid_escape); /* on return, points to first
+                                           invalid escaped char in
+                                           string. */
+
+/* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */
+
+/* Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. */
+extern PyObject* _PyUnicode_DecodeRawUnicodeEscapeStateful(
+    const char *string,     /* Unicode-Escape encoded string */
+    Py_ssize_t length,      /* size of string */
+    const char *errors,     /* error handling */
+    Py_ssize_t *consumed);  /* bytes consumed */
+
+/* --- Latin-1 Codecs ----------------------------------------------------- */
+
+extern PyObject* _PyUnicode_AsLatin1String(
+    PyObject* unicode,
+    const char* errors);
+
+/* --- ASCII Codecs ------------------------------------------------------- */
+
+extern PyObject* _PyUnicode_AsASCIIString(
+    PyObject* unicode,
+    const char* errors);
+
+/* --- Character Map Codecs ----------------------------------------------- */
+
+/* Translate an Unicode object by applying a character mapping table to
+   it and return the resulting Unicode object.
+
+   The mapping table must map Unicode ordinal integers to Unicode strings,
+   Unicode ordinal integers or None (causing deletion of the character).
+
+   Mapping tables may be dictionaries or sequences. Unmapped character
+   ordinals (ones which cause a LookupError) are left untouched and
+   are copied as-is.
+*/
+extern PyObject* _PyUnicode_EncodeCharmap(
+    PyObject *unicode,          /* Unicode object */
+    PyObject *mapping,          /* encoding mapping */
+    const char *errors);        /* error handling */
+
 /* --- Methods & Slots ---------------------------------------------------- */

 extern PyObject* _PyUnicode_JoinArray(
--- a/Parser/string_parser.c
+++ b/Parser/string_parser.c
@ -1,6 +1,7 @@
 #include <stdbool.h>

 #include <Python.h>
+#include "pycore_unicodeobject.h" // _PyUnicode_DecodeUnicodeEscapeInternal()

 #include "tokenizer.h"
 #include "pegen.h"