From f62ad4f2c4214fdc05cc45c27a5c068553c7942c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 13 May 2022 12:41:05 +0200 Subject: [PATCH] gh-89653: Use int type for Unicode kind (#92704) Use the same type that PyUnicode_FromKindAndData() kind parameter type (public C API): int. --- Include/cpython/unicodeobject.h | 4 +-- Modules/_csv.c | 8 ++--- Modules/_datetimemodule.c | 2 +- Modules/_decimal/_decimal.c | 4 +-- Modules/_elementtree.c | 2 +- Modules/_operator.c | 9 ++---- Modules/_pickle.c | 2 +- Modules/pyexpat.c | 2 +- Objects/bytesobject.c | 2 +- Objects/longobject.c | 4 +-- Objects/stringlib/localeutil.h | 2 +- Objects/unicodeobject.c | 56 ++++++++++++++++----------------- Python/formatter_unicode.c | 4 +-- 13 files changed, 49 insertions(+), 52 deletions(-) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 030614e2633..16db2cb7bff 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -507,7 +507,7 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( typedef struct { PyObject *buffer; void *data; - enum PyUnicode_Kind kind; + int kind; Py_UCS4 maxchar; Py_ssize_t size; Py_ssize_t pos; @@ -566,7 +566,7 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, macro instead. */ PyAPI_FUNC(int) _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, - enum PyUnicode_Kind kind); + int kind); /* Append a Unicode character. Return 0 on success, raise an exception and return -1 on error. */ diff --git a/Modules/_csv.c b/Modules/_csv.c index cbf4c5de519..d34d0a1296a 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -868,7 +868,7 @@ Reader_iternext(ReaderObj *self) PyObject *fields = NULL; Py_UCS4 c; Py_ssize_t pos, linelen; - unsigned int kind; + int kind; const void *data; PyObject *lineobj; @@ -1066,7 +1066,7 @@ join_reset(WriterObj *self) * record length. */ static Py_ssize_t -join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data, +join_append_data(WriterObj *self, int field_kind, const void *field_data, Py_ssize_t field_len, int *quoted, int copy_phase) { @@ -1179,7 +1179,7 @@ join_check_rec_size(WriterObj *self, Py_ssize_t rec_len) static int join_append(WriterObj *self, PyObject *field, int quoted) { - unsigned int field_kind = -1; + int field_kind = -1; const void *field_data = NULL; Py_ssize_t field_len = 0; Py_ssize_t rec_len; @@ -1211,7 +1211,7 @@ static int join_append_lineterminator(WriterObj *self) { Py_ssize_t terminator_len, i; - unsigned int term_kind; + int term_kind; const void *term_data; terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator); diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index e0bb4ee602c..06dff8fdd9e 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -5284,7 +5284,7 @@ _sanitize_isoformat_str(PyObject *dtstr) // // The result of this, if not NULL, returns a new reference const void* const unicode_data = PyUnicode_DATA(dtstr); - const unsigned int kind = PyUnicode_KIND(dtstr); + const int kind = PyUnicode_KIND(dtstr); // Depending on the format of the string, the separator can only ever be // in positions 7, 8 or 10. We'll check each of these for a surrogate and diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c index 5cbddac6232..548bd601b0e 100644 --- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -1918,7 +1918,7 @@ dec_dealloc(PyObject *dec) /******************************************************************************/ Py_LOCAL_INLINE(int) -is_space(enum PyUnicode_Kind kind, const void *data, Py_ssize_t pos) +is_space(int kind, const void *data, Py_ssize_t pos) { Py_UCS4 ch = PyUnicode_READ(kind, data, pos); return Py_UNICODE_ISSPACE(ch); @@ -1935,7 +1935,7 @@ is_space(enum PyUnicode_Kind kind, const void *data, Py_ssize_t pos) static char * numeric_as_ascii(PyObject *u, int strip_ws, int ignore_underscores) { - enum PyUnicode_Kind kind; + int kind; const void *data; Py_UCS4 ch; char *res, *cp; diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index 89f877f6e12..453e57a94f2 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -1121,7 +1121,7 @@ checkpath(PyObject* tag) if (PyUnicode_Check(tag)) { const Py_ssize_t len = PyUnicode_GET_LENGTH(tag); const void *data = PyUnicode_DATA(tag); - unsigned int kind = PyUnicode_KIND(tag); + int kind = PyUnicode_KIND(tag); if (len >= 3 && PyUnicode_READ(kind, data, 0) == '{' && ( PyUnicode_READ(kind, data, 1) == '}' || ( PyUnicode_READ(kind, data, 1) == '*' && diff --git a/Modules/_operator.c b/Modules/_operator.c index 739ae5b229e..1af4a4feb77 100644 --- a/Modules/_operator.c +++ b/Modules/_operator.c @@ -1230,9 +1230,6 @@ attrgetter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) /* prepare attr while checking args */ for (idx = 0; idx < nattrs; ++idx) { PyObject *item = PyTuple_GET_ITEM(args, idx); - Py_ssize_t item_len; - const void *data; - unsigned int kind; int dot_count; if (!PyUnicode_Check(item)) { @@ -1245,9 +1242,9 @@ attrgetter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) Py_DECREF(attr); return NULL; } - item_len = PyUnicode_GET_LENGTH(item); - kind = PyUnicode_KIND(item); - data = PyUnicode_DATA(item); + Py_ssize_t item_len = PyUnicode_GET_LENGTH(item); + int kind = PyUnicode_KIND(item); + const void *data = PyUnicode_DATA(item); /* check whether the string is dotted */ dot_count = 0; diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 851feadd27d..0adfa4103cc 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -2579,7 +2579,7 @@ raw_unicode_escape(PyObject *obj) char *p; Py_ssize_t i, size; const void *data; - unsigned int kind; + int kind; _PyBytesWriter writer; if (PyUnicode_READY(obj)) diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index ad8148adae9..b8535dfe711 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -1088,7 +1088,7 @@ PyUnknownEncodingHandler(void *encodingHandlerData, PyObject* u; int i; const void *data; - unsigned int kind; + int kind; if (PyErr_Occurred()) return XML_STATUS_ERROR; diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index b429f7687f7..1dfa1d57cf6 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2396,7 +2396,7 @@ _PyBytes_FromHex(PyObject *string, int use_bytearray) if (!PyUnicode_IS_ASCII(string)) { const void *data = PyUnicode_DATA(string); - unsigned int kind = PyUnicode_KIND(string); + int kind = PyUnicode_KIND(string); Py_ssize_t i; /* search for the first non-ASCII character */ diff --git a/Objects/longobject.c b/Objects/longobject.c index 78360a58fac..7f60866d2eb 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1714,7 +1714,7 @@ long_to_decimal_string_internal(PyObject *aa, digit *pout, *pin, rem, tenpow; int negative; int d; - enum PyUnicode_Kind kind; + int kind; a = (PyLongObject *)aa; if (a == NULL || !PyLong_Check(a)) { @@ -1904,7 +1904,7 @@ long_format_binary(PyObject *aa, int base, int alternate, PyObject *v = NULL; Py_ssize_t sz; Py_ssize_t size_a; - enum PyUnicode_Kind kind; + int kind; int negative; int bits; diff --git a/Objects/stringlib/localeutil.h b/Objects/stringlib/localeutil.h index bd16e0a1728..d77715ec0de 100644 --- a/Objects/stringlib/localeutil.h +++ b/Objects/stringlib/localeutil.h @@ -75,7 +75,7 @@ InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos, if (n_zeros) { *buffer_pos -= n_zeros; - enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer); + int kind = PyUnicode_KIND(writer->buffer); void *data = PyUnicode_DATA(writer->buffer); unicode_fill(kind, data, '0', *buffer_pos, n_zeros); } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index cc50fcd7679..ee327579352 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -247,7 +247,7 @@ static inline PyObject* unicode_new_empty(void) } while (0) static inline void -unicode_fill(enum PyUnicode_Kind kind, void *data, Py_UCS4 value, +unicode_fill(int kind, void *data, Py_UCS4 value, Py_ssize_t start, Py_ssize_t length) { assert(0 <= start); @@ -483,7 +483,7 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content) CHECK(PyUnicode_Check(op)); PyASCIIObject *ascii = _PyASCIIObject_CAST(op); - unsigned int kind = ascii->state.kind; + int kind = ascii->state.kind; if (ascii->state.ascii == 1 && ascii->state.compact == 1) { CHECK(kind == PyUnicode_1BYTE_KIND); @@ -612,7 +612,7 @@ backslashreplace(_PyBytesWriter *writer, char *str, { Py_ssize_t size, i; Py_UCS4 ch; - enum PyUnicode_Kind kind; + int kind; const void *data; kind = PyUnicode_KIND(unicode); @@ -678,7 +678,7 @@ xmlcharrefreplace(_PyBytesWriter *writer, char *str, { Py_ssize_t size, i; Py_UCS4 ch; - enum PyUnicode_Kind kind; + int kind; const void *data; kind = PyUnicode_KIND(unicode); @@ -1128,7 +1128,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) PyObject *obj; PyCompactUnicodeObject *unicode; void *data; - enum PyUnicode_Kind kind; + int kind; int is_ascii; Py_ssize_t char_size; Py_ssize_t struct_size; @@ -1268,7 +1268,7 @@ _copy_characters(PyObject *to, Py_ssize_t to_start, PyObject *from, Py_ssize_t from_start, Py_ssize_t how_many, int check_maxchar) { - unsigned int from_kind, to_kind; + int from_kind, to_kind; const void *from_data; void *to_data; @@ -1663,7 +1663,7 @@ static void unicode_write_cstr(PyObject *unicode, Py_ssize_t index, const char *str, Py_ssize_t len) { - enum PyUnicode_Kind kind = PyUnicode_KIND(unicode); + int kind = PyUnicode_KIND(unicode); const void *data = PyUnicode_DATA(unicode); const char *end = str + len; @@ -1950,7 +1950,7 @@ _PyUnicode_FromASCII(const char *buffer, Py_ssize_t size) } static Py_UCS4 -kind_maxchar_limit(unsigned int kind) +kind_maxchar_limit(int kind) { switch (kind) { case PyUnicode_1BYTE_KIND: @@ -2064,7 +2064,7 @@ PyUnicode_FromKindAndData(int kind, const void *buffer, Py_ssize_t size) Py_UCS4 _PyUnicode_FindMaxChar(PyObject *unicode, Py_ssize_t start, Py_ssize_t end) { - enum PyUnicode_Kind kind; + int kind; const void *startptr, *endptr; assert(0 <= start); @@ -2105,7 +2105,7 @@ unicode_adjust_maxchar(PyObject **p_unicode) PyObject *unicode, *copy; Py_UCS4 max_char; Py_ssize_t len; - unsigned int kind; + int kind; assert(p_unicode != NULL); unicode = *p_unicode; @@ -2170,7 +2170,7 @@ _PyUnicode_Copy(PyObject *unicode) character. Return NULL on error. */ static void* -unicode_askind(unsigned int skind, void const *data, Py_ssize_t len, unsigned int kind) +unicode_askind(int skind, void const *data, Py_ssize_t len, int kind) { void *result; @@ -4984,7 +4984,7 @@ unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler, return PyBytes_FromStringAndSize(PyUnicode_UTF8(unicode), PyUnicode_UTF8_LENGTH(unicode)); - enum PyUnicode_Kind kind = PyUnicode_KIND(unicode); + int kind = PyUnicode_KIND(unicode); const void *data = PyUnicode_DATA(unicode); Py_ssize_t size = PyUnicode_GET_LENGTH(unicode); @@ -5020,7 +5020,7 @@ unicode_fill_utf8(PyObject *unicode) /* the string cannot be ASCII, or PyUnicode_UTF8() would be set */ assert(!PyUnicode_IS_ASCII(unicode)); - enum PyUnicode_Kind kind = PyUnicode_KIND(unicode); + int kind = PyUnicode_KIND(unicode); const void *data = PyUnicode_DATA(unicode); Py_ssize_t size = PyUnicode_GET_LENGTH(unicode); @@ -5155,7 +5155,7 @@ PyUnicode_DecodeUTF32Stateful(const char *s, Py_UCS4 maxch = PyUnicode_MAX_CHAR_VALUE(writer.buffer); if (e - q >= 4) { - enum PyUnicode_Kind kind = writer.kind; + int kind = writer.kind; void *data = writer.data; const unsigned char *last = e - 4; Py_ssize_t pos = writer.pos; @@ -5240,7 +5240,7 @@ _PyUnicode_EncodeUTF32(PyObject *str, const char *errors, int byteorder) { - enum PyUnicode_Kind kind; + int kind; const void *data; Py_ssize_t len; PyObject *v; @@ -5557,7 +5557,7 @@ _PyUnicode_EncodeUTF16(PyObject *str, const char *errors, int byteorder) { - enum PyUnicode_Kind kind; + int kind; const void *data; Py_ssize_t len; PyObject *v; @@ -6022,7 +6022,7 @@ PyUnicode_AsUnicodeEscapeString(PyObject *unicode) Py_ssize_t i, len; PyObject *repr; char *p; - enum PyUnicode_Kind kind; + int kind; const void *data; Py_ssize_t expandsize; @@ -7365,7 +7365,7 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes, } else { Py_ssize_t i; - enum PyUnicode_Kind kind; + int kind; const void *data; outsize = PyUnicode_GET_LENGTH(rep); @@ -7504,7 +7504,7 @@ charmap_decode_string(const char *s, Py_ssize_t startinpos, endinpos; PyObject *errorHandler = NULL, *exc = NULL; Py_ssize_t maplen; - enum PyUnicode_Kind mapkind; + int mapkind; const void *mapdata; Py_UCS4 x; unsigned char ch; @@ -7542,7 +7542,7 @@ charmap_decode_string(const char *s, while (s < e) { if (mapkind == PyUnicode_2BYTE_KIND && maplen >= 256) { - enum PyUnicode_Kind outkind = writer->kind; + int outkind = writer->kind; const Py_UCS2 *mapdata_ucs2 = (const Py_UCS2 *)mapdata; if (outkind == PyUnicode_1BYTE_KIND) { Py_UCS1 *outdata = (Py_UCS1 *)writer->data; @@ -8061,7 +8061,7 @@ charmap_encoding_error( PyObject *repunicode = NULL; /* initialize to prevent gcc warning */ Py_ssize_t size, repsize; Py_ssize_t newpos; - enum PyUnicode_Kind kind; + int kind; const void *data; Py_ssize_t index; /* startpos for collecting unencodable chars */ @@ -9422,7 +9422,7 @@ _PyUnicode_JoinArray(PyObject *separator, PyObject *const *items, Py_ssize_t seq int use_memcpy; unsigned char *res_data = NULL, *sep_data = NULL; PyObject *last_obj; - unsigned int kind = 0; + int kind = 0; /* If empty sequence, return u"". */ if (seqlen == 0) { @@ -9581,7 +9581,7 @@ void _PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length, Py_UCS4 fill_char) { - const enum PyUnicode_Kind kind = PyUnicode_KIND(unicode); + const int kind = PyUnicode_KIND(unicode); void *data = PyUnicode_DATA(unicode); assert(unicode_modifiable(unicode)); assert(fill_char <= PyUnicode_MAX_CHAR_VALUE(unicode)); @@ -11049,7 +11049,7 @@ static PyObject * unicode_getitem(PyObject *self, Py_ssize_t index) { const void *data; - enum PyUnicode_Kind kind; + int kind; Py_UCS4 ch; if (!PyUnicode_Check(self)) { @@ -12985,7 +12985,7 @@ _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, int _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, - enum PyUnicode_Kind kind) + int kind) { Py_UCS4 maxchar; @@ -13444,7 +13444,7 @@ struct unicode_formatter_t { Py_ssize_t arglen, argidx; PyObject *dict; - enum PyUnicode_Kind fmtkind; + int fmtkind; Py_ssize_t fmtcnt, fmtpos; const void *fmtdata; PyObject *fmtstr; @@ -14106,7 +14106,7 @@ unicode_format_arg_output(struct unicode_formatter_t *ctx, PyObject *str) { Py_ssize_t len; - enum PyUnicode_Kind kind; + int kind; const void *pbuf; Py_ssize_t pindex; Py_UCS4 signchar; @@ -14422,7 +14422,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode) PyObject *self; Py_ssize_t length, char_size; int share_utf8; - unsigned int kind; + int kind; void *data; assert(PyType_IsSubtype(type, &PyUnicode_Type)); diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index 04d37c0be28..38e5f69bfb4 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -608,7 +608,7 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, { /* Used to keep track of digits, decimal, and remainder. */ Py_ssize_t d_pos = d_start; - const unsigned int kind = writer->kind; + const int kind = writer->kind; const void *data = writer->data; Py_ssize_t r; @@ -1215,7 +1215,7 @@ format_complex_internal(PyObject *value, int flags = 0; int result = -1; Py_UCS4 maxchar = 127; - enum PyUnicode_Kind rkind; + int rkind; void *rdata; Py_UCS4 re_sign_char = '\0'; Py_UCS4 im_sign_char = '\0';