From 7d07e5891d2843f269fac00dc8847abfe3671765 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 1 Jun 2023 09:18:09 +0200 Subject: [PATCH] gh-105156: Cleanup usage of old Py_UNICODE type (#105158) * refcounts.dat: * Remove Py_UNICODE functions. * Replace Py_UNICODE argument type with wchar_t. * _PyUnicode_ToLowercase(), _PyUnicode_ToUppercase(), _PyUnicode_ToTitlecase() are no longer deprecated in comments. It's no longer needed since they now use Py_UCS4 type, rather than the deprecated Py_UNICODE type. * gdb: Remove unused char_width() method. --- Doc/data/refcounts.dat | 60 +++++++++++---------------------- Include/cpython/unicodeobject.h | 8 ++--- Objects/stringlib/README.txt | 2 +- Tools/gdb/libpython.py | 10 ++---- 4 files changed, 27 insertions(+), 53 deletions(-) diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index ee64ffdc916..ef37b834878 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -2374,76 +2374,56 @@ PyUnicode_KIND:PyObject*:o:0: PyUnicode_MAX_CHAR_VALUE:::: PyUnicode_MAX_CHAR_VALUE:PyObject*:o:0: -PyUnicode_AS_UNICODE:Py_UNICODE*::: -PyUnicode_AS_UNICODE:PyObject*:o:0: - -PyUnicode_AS_DATA:const char*::: -PyUnicode_AS_DATA:PyObject*:o:0: - Py_UNICODE_ISALNUM:int::: -Py_UNICODE_ISALNUM:Py_UNICODE:ch:: +Py_UNICODE_ISALNUM:Py_UCS4:ch:: Py_UNICODE_ISALPHA:int::: -Py_UNICODE_ISALPHA:Py_UNICODE:ch:: +Py_UNICODE_ISALPHA:Py_UCS4:ch:: Py_UNICODE_ISSPACE:int::: -Py_UNICODE_ISSPACE:Py_UNICODE:ch:: +Py_UNICODE_ISSPACE:Py_UCS4:ch:: Py_UNICODE_ISLOWER:int::: -Py_UNICODE_ISLOWER:Py_UNICODE:ch:: +Py_UNICODE_ISLOWER:Py_UCS4:ch:: Py_UNICODE_ISUPPER:int::: -Py_UNICODE_ISUPPER:Py_UNICODE:ch:: +Py_UNICODE_ISUPPER:Py_UCS4:ch:: Py_UNICODE_ISTITLE:int::: -Py_UNICODE_ISTITLE:Py_UNICODE:ch:: +Py_UNICODE_ISTITLE:Py_UCS4:ch:: Py_UNICODE_ISLINEBREAK:int::: -Py_UNICODE_ISLINEBREAK:Py_UNICODE:ch:: +Py_UNICODE_ISLINEBREAK:Py_UCS4:ch:: Py_UNICODE_ISDECIMAL:int::: -Py_UNICODE_ISDECIMAL:Py_UNICODE:ch:: +Py_UNICODE_ISDECIMAL:Py_UCS4:ch:: Py_UNICODE_ISDIGIT:int::: -Py_UNICODE_ISDIGIT:Py_UNICODE:ch:: +Py_UNICODE_ISDIGIT:Py_UCS4:ch:: Py_UNICODE_ISNUMERIC:int::: -Py_UNICODE_ISNUMERIC:Py_UNICODE:ch:: +Py_UNICODE_ISNUMERIC:Py_UCS4:ch:: Py_UNICODE_ISPRINTABLE:int::: -Py_UNICODE_ISPRINTABLE:Py_UNICODE:ch:: +Py_UNICODE_ISPRINTABLE:Py_UCS4:ch:: -Py_UNICODE_TOLOWER:Py_UNICODE::: -Py_UNICODE_TOLOWER:Py_UNICODE:ch:: +Py_UNICODE_TOLOWER:Py_UCS4::: +Py_UNICODE_TOLOWER:Py_UCS4:ch:: -Py_UNICODE_TOUPPER:Py_UNICODE::: -Py_UNICODE_TOUPPER:Py_UNICODE:ch:: +Py_UNICODE_TOUPPER:Py_UCS4::: +Py_UNICODE_TOUPPER:Py_UCS4:ch:: -Py_UNICODE_TOTITLE:Py_UNICODE::: -Py_UNICODE_TOTITLE:Py_UNICODE:ch:: +Py_UNICODE_TOTITLE:Py_UCS4::: +Py_UNICODE_TOTITLE:Py_UCS4:ch:: Py_UNICODE_TODECIMAL:int::: -Py_UNICODE_TODECIMAL:Py_UNICODE:ch:: +Py_UNICODE_TODECIMAL:Py_UCS4:ch:: Py_UNICODE_TODIGIT:int::: -Py_UNICODE_TODIGIT:Py_UNICODE:ch:: +Py_UNICODE_TODIGIT:Py_UCS4:ch:: Py_UNICODE_TONUMERIC:double::: -Py_UNICODE_TONUMERIC:Py_UNICODE:ch:: - -PyUnicode_FromUnicode:PyObject*::+1: -PyUnicode_FromUnicode:const Py_UNICODE*:u:: -PyUnicode_FromUnicode:Py_ssize_t:size:: - -PyUnicode_AsUnicode:Py_UNICODE*::: -PyUnicode_AsUnicode:PyObject*:unicode:0: - -PyUnicode_AsUnicodeAndSize:Py_UNICODE*::: -PyUnicode_AsUnicodeAndSize:PyObject*:unicode:0: -PyUnicode_AsUnicodeAndSize:Py_ssize_t*:size:: - -PyUnicode_GetSize:Py_ssize_t::: -PyUnicode_GetSize:PyObject*:unicode:0: +Py_UNICODE_TONUMERIC:Py_UCS4:ch:: PyUnicode_FromObject:PyObject*::+1: PyUnicode_FromObject:PyObject*:obj:0: diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 92e7afde427..dee8b27d3d9 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -379,8 +379,6 @@ static inline Py_UCS4 PyUnicode_MAX_CHAR_VALUE(PyObject *op) /* === Public API ========================================================= */ -/* --- Plain Py_UNICODE --------------------------------------------------- */ - /* With PEP 393, this is the recommended way to allocate a new unicode object. This function will allocate the object and its buffer in a single memory block. Objects created using this function are not resizable. */ @@ -827,15 +825,15 @@ PyAPI_FUNC(int) _PyUnicode_IsLinebreak( const Py_UCS4 ch /* Unicode character */ ); -/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase( +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase( Py_UCS4 ch /* Unicode character */ ); -/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase( +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase( Py_UCS4 ch /* Unicode character */ ); -Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase( +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase( Py_UCS4 ch /* Unicode character */ ); diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt index e1e329290ac..26f3d02b0ef 100644 --- a/Objects/stringlib/README.txt +++ b/Objects/stringlib/README.txt @@ -9,7 +9,7 @@ the following defines used by the different modules: STRINGLIB_CHAR - the type used to hold a character (char or Py_UNICODE) + the type used to hold a character (char, Py_UCS1, Py_UCS2 or Py_UCS4) STRINGLIB_GET_EMPTY() diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py index e38bd59e20a..79b8c7527c2 100755 --- a/Tools/gdb/libpython.py +++ b/Tools/gdb/libpython.py @@ -1390,10 +1390,6 @@ def _unichr_is_printable(char): class PyUnicodeObjectPtr(PyObjectPtr): _typename = 'PyUnicodeObject' - def char_width(self): - _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE') - return _type_Py_UNICODE.sizeof - def proxyval(self, visited): compact = self.field('_base') ascii = compact['_base'] @@ -1414,13 +1410,13 @@ class PyUnicodeObjectPtr(PyObjectPtr): elif repr_kind == 4: field_str = field_str.cast(_type_unsigned_int_ptr()) - # Gather a list of ints from the Py_UNICODE array; these are either + # Gather a list of ints from the code point array; these are either # UCS-1, UCS-2 or UCS-4 code points: - Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)] + code_points = [int(field_str[i]) for i in safe_range(field_length)] # Convert the int code points to unicode characters, and generate a # local unicode instance. - result = u''.join(map(chr, Py_UNICODEs)) + result = u''.join(map(chr, code_points)) return result def write_repr(self, out, visited):