mirror of https://github.com/python/cpython
gh-89653: PEP 670: Convert unicodeobject.h macros to functions (#91773)
Convert unicodeobject.h macros to static inline functions: * PyUnicode_MAX_CHAR_VALUE() * PyUnicode_READ() * PyUnicode_READY() * PyUnicode_READ_CHAR() * PyUnicode_WRITE() Move PyUnicode_READY() after _PyUnicode_Ready(), since it uses _PyUnicode_Ready(). Static inline functions are wrapped by macros which casts arguments with _PyObject_CAST() and casts 'kind' arguments to "unsigned int" to prevent introducing new compiler warnings when passing "const PyObject*".
This commit is contained in:
parent
f8dc6186d1
commit
128d624117
|
@ -45,7 +45,7 @@
|
|||
#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
|
||||
|
||||
#define Py_UNICODE_ISALNUM(ch) \
|
||||
(Py_UNICODE_ISALPHA(ch) || \
|
||||
(Py_UNICODE_ISALPHA(ch) || \
|
||||
Py_UNICODE_ISDECIMAL(ch) || \
|
||||
Py_UNICODE_ISDIGIT(ch) || \
|
||||
Py_UNICODE_ISNUMERIC(ch))
|
||||
|
@ -379,80 +379,84 @@ static inline Py_ssize_t PyUnicode_GET_LENGTH(PyObject *op) {
|
|||
}
|
||||
#define PyUnicode_GET_LENGTH(op) PyUnicode_GET_LENGTH(_PyObject_CAST(op))
|
||||
|
||||
/* In the access macros below, "kind" may be evaluated more than once.
|
||||
All other macro parameters are evaluated exactly once, so it is safe
|
||||
to put side effects into them (such as increasing the index). */
|
||||
|
||||
/* Write into the canonical representation, this macro does not do any sanity
|
||||
/* Write into the canonical representation, this function does not do any sanity
|
||||
checks and is intended for usage in loops. The caller should cache the
|
||||
kind and data pointers obtained from other macro calls.
|
||||
kind and data pointers obtained from other function calls.
|
||||
index is the index in the string (starts at 0) and value is the new
|
||||
code point value which should be written to that location. */
|
||||
static inline void PyUnicode_WRITE(unsigned int kind, void *data,
|
||||
Py_ssize_t index, Py_UCS4 value)
|
||||
{
|
||||
if (kind == PyUnicode_1BYTE_KIND) {
|
||||
((Py_UCS1 *)data)[index] = (Py_UCS1)value;
|
||||
}
|
||||
else if (kind == PyUnicode_2BYTE_KIND) {
|
||||
((Py_UCS2 *)data)[index] = (Py_UCS2)value;
|
||||
}
|
||||
else {
|
||||
assert(kind == PyUnicode_4BYTE_KIND);
|
||||
((Py_UCS4 *)data)[index] = value;
|
||||
}
|
||||
}
|
||||
#define PyUnicode_WRITE(kind, data, index, value) \
|
||||
do { \
|
||||
switch ((kind)) { \
|
||||
case PyUnicode_1BYTE_KIND: { \
|
||||
((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
|
||||
break; \
|
||||
} \
|
||||
case PyUnicode_2BYTE_KIND: { \
|
||||
((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
|
||||
break; \
|
||||
} \
|
||||
default: { \
|
||||
assert((kind) == PyUnicode_4BYTE_KIND); \
|
||||
((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
PyUnicode_WRITE((unsigned int)(kind), (void*)(data), (index), (Py_UCS4)(value))
|
||||
|
||||
/* Read a code point from the string's canonical representation. No checks
|
||||
or ready calls are performed. */
|
||||
static inline Py_UCS4 PyUnicode_READ(unsigned int kind,
|
||||
const void *data, Py_ssize_t index)
|
||||
{
|
||||
if (kind == PyUnicode_1BYTE_KIND) {
|
||||
return ((const Py_UCS1 *)data)[index];
|
||||
}
|
||||
if (kind == PyUnicode_2BYTE_KIND) {
|
||||
return ((const Py_UCS2 *)data)[index];
|
||||
}
|
||||
return ((const Py_UCS4 *)data)[index];
|
||||
}
|
||||
#define PyUnicode_READ(kind, data, index) \
|
||||
((Py_UCS4) \
|
||||
((kind) == PyUnicode_1BYTE_KIND ? \
|
||||
((const Py_UCS1 *)(data))[(index)] : \
|
||||
((kind) == PyUnicode_2BYTE_KIND ? \
|
||||
((const Py_UCS2 *)(data))[(index)] : \
|
||||
((const Py_UCS4 *)(data))[(index)] \
|
||||
) \
|
||||
))
|
||||
PyUnicode_READ((unsigned int)(kind), (const void*)(data), (index))
|
||||
|
||||
/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
|
||||
calls PyUnicode_KIND() and might call it twice. For single reads, use
|
||||
PyUnicode_READ_CHAR, for multiple consecutive reads callers should
|
||||
cache kind and use PyUnicode_READ instead. */
|
||||
static inline Py_UCS4 PyUnicode_READ_CHAR(PyObject *unicode, Py_ssize_t index)
|
||||
{
|
||||
assert(PyUnicode_IS_READY(unicode));
|
||||
unsigned int kind = PyUnicode_KIND(unicode);
|
||||
if (kind == PyUnicode_1BYTE_KIND) {
|
||||
return PyUnicode_1BYTE_DATA(unicode)[index];
|
||||
}
|
||||
if (kind == PyUnicode_2BYTE_KIND) {
|
||||
return PyUnicode_2BYTE_DATA(unicode)[index];
|
||||
}
|
||||
return PyUnicode_4BYTE_DATA(unicode)[index];
|
||||
}
|
||||
#define PyUnicode_READ_CHAR(unicode, index) \
|
||||
(assert(PyUnicode_IS_READY(unicode)), \
|
||||
(Py_UCS4) \
|
||||
(PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
|
||||
((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
|
||||
(PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
|
||||
((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
|
||||
((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
|
||||
) \
|
||||
))
|
||||
|
||||
/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
|
||||
case. If the canonical representation is not yet set, it will still call
|
||||
_PyUnicode_Ready().
|
||||
Returns 0 on success and -1 on errors. */
|
||||
#define PyUnicode_READY(op) \
|
||||
((PyUnicode_IS_READY(op) ? \
|
||||
0 : _PyUnicode_Ready(_PyObject_CAST(op))))
|
||||
PyUnicode_READ_CHAR(_PyObject_CAST(unicode), (index))
|
||||
|
||||
/* Return a maximum character value which is suitable for creating another
|
||||
string based on op. This is always an approximation but more efficient
|
||||
than iterating over the string. */
|
||||
static inline Py_UCS4 PyUnicode_MAX_CHAR_VALUE(PyObject *op)
|
||||
{
|
||||
assert(PyUnicode_IS_READY(op));
|
||||
if (PyUnicode_IS_ASCII(op)) {
|
||||
return 0x7fU;
|
||||
}
|
||||
|
||||
unsigned int kind = PyUnicode_KIND(op);
|
||||
if (kind == PyUnicode_1BYTE_KIND) {
|
||||
return 0xffU;
|
||||
}
|
||||
if (kind == PyUnicode_2BYTE_KIND) {
|
||||
return 0xffffU;
|
||||
}
|
||||
return 0x10ffffU;
|
||||
}
|
||||
#define PyUnicode_MAX_CHAR_VALUE(op) \
|
||||
(assert(PyUnicode_IS_READY(op)), \
|
||||
(PyUnicode_IS_ASCII(op) ? \
|
||||
(0x7f) : \
|
||||
(PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \
|
||||
(0xffU) : \
|
||||
(PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \
|
||||
(0xffffU) : \
|
||||
(0x10ffffU)))))
|
||||
PyUnicode_MAX_CHAR_VALUE(_PyObject_CAST(op))
|
||||
|
||||
Py_DEPRECATED(3.3)
|
||||
static inline Py_ssize_t PyUnicode_WSTR_LENGTH(PyObject *op) {
|
||||
|
@ -479,12 +483,25 @@ PyAPI_FUNC(PyObject*) PyUnicode_New(
|
|||
objects which were created using the old API to the new flexible format
|
||||
introduced with PEP 393.
|
||||
|
||||
Don't call this function directly, use the public PyUnicode_READY() macro
|
||||
Don't call this function directly, use the public PyUnicode_READY() function
|
||||
instead. */
|
||||
PyAPI_FUNC(int) _PyUnicode_Ready(
|
||||
PyObject *unicode /* Unicode object */
|
||||
);
|
||||
|
||||
/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
|
||||
case. If the canonical representation is not yet set, it will still call
|
||||
_PyUnicode_Ready().
|
||||
Returns 0 on success and -1 on errors. */
|
||||
static inline int PyUnicode_READY(PyObject *op)
|
||||
{
|
||||
if (PyUnicode_IS_READY(op)) {
|
||||
return 0;
|
||||
}
|
||||
return _PyUnicode_Ready(op);
|
||||
}
|
||||
#define PyUnicode_READY(op) PyUnicode_READY(_PyObject_CAST(op))
|
||||
|
||||
/* Get a copy of a Unicode string. */
|
||||
PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
|
||||
PyObject *unicode
|
||||
|
|
Loading…
Reference in New Issue