bpo-40521: Optimize PyUnicode_New(0, maxchar) (GH-21099)

Functions of unicodeobject.c, like PyUnicode_New(), no longer check
if the empty Unicode singleton has been initialized or not. Consider
that it is always initialized. The Unicode API must not be used
before _PyUnicode_Init() or after _PyUnicode_Fini().
This commit is contained in:
Victor Stinner 2020-06-24 00:34:07 +02:00 committed by GitHub
parent f363d0a6e9
commit 90ed8a6d71
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 25 additions and 55 deletions

View File

@ -231,28 +231,19 @@ get_unicode_state(void)
// Return a borrowed reference to the empty string singleton.
// Return NULL if the singleton was not created yet.
static inline PyObject* unicode_get_empty(void)
{
struct _Py_unicode_state *state = get_unicode_state();
// unicode_get_empty() must not be called before _PyUnicode_Init()
// or after _PyUnicode_Fini()
assert(state->empty != NULL);
return state->empty;
}
static inline PyObject* unicode_new_empty(void)
{
struct _Py_unicode_state *state = get_unicode_state();
PyObject *empty = state->empty;
if (empty != NULL) {
PyObject *empty = unicode_get_empty();
Py_INCREF(empty);
}
else {
empty = PyUnicode_New(0, 0);
if (empty != NULL) {
Py_INCREF(empty);
assert(_PyUnicode_CheckConsistency(empty, 1));
state->empty = empty;
}
}
return empty;
}
@ -696,12 +687,9 @@ unicode_result_ready(PyObject *unicode)
PyObject *empty = unicode_get_empty();
if (unicode != empty) {
Py_DECREF(unicode);
Py_INCREF(empty);
return empty;
}
// unicode is the empty string singleton
return unicode;
return empty;
}
#ifdef LATIN1_SINGLETONS
@ -1260,11 +1248,7 @@ _PyUnicode_New(Py_ssize_t length)
/* Optimization for empty strings */
if (length == 0) {
PyObject *empty = unicode_get_empty();
if (empty != NULL) {
Py_INCREF(empty);
return (PyUnicodeObject *)empty;
}
return (PyUnicodeObject *)unicode_new_empty();
}
/* Ensure we won't overflow the size. */
@ -1416,11 +1400,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
{
/* Optimization for empty strings */
if (size == 0) {
PyObject *empty = unicode_get_empty();
if (empty != NULL) {
Py_INCREF(empty);
return empty;
}
return unicode_new_empty();
}
PyObject *obj;
@ -2001,8 +1981,7 @@ unicode_dealloc(PyObject *unicode)
static int
unicode_is_singleton(PyObject *unicode)
{
struct _Py_unicode_state *state = get_unicode_state();
if (unicode == state->empty) {
if (unicode == unicode_get_empty()) {
return 1;
}
#ifdef LATIN1_SINGLETONS
@ -2059,8 +2038,6 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
if (length == 0) {
PyObject *empty = unicode_new_empty();
if (!empty)
return -1;
Py_SETREF(*p_unicode, empty);
return 0;
}
@ -10868,10 +10845,7 @@ replace(PyObject *self, PyObject *str1,
}
new_size = slen + n * (len2 - len1);
if (new_size == 0) {
PyObject *empty = unicode_new_empty();
if (!empty)
goto error;
u = empty;
u = unicode_new_empty();
goto done;
}
if (new_size > (PY_SSIZE_T_MAX / rkind)) {
@ -13293,13 +13267,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
len2 = PyUnicode_GET_LENGTH(sep_obj);
if (kind1 < kind2 || len1 < len2) {
PyObject *empty = unicode_get_empty(); // Borrowed reference
if (!empty) {
out = NULL;
}
else {
out = PyTuple_Pack(3, str_obj, empty, empty);
}
return out;
return PyTuple_Pack(3, str_obj, empty, empty);
}
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj);
@ -13351,13 +13319,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
len2 = PyUnicode_GET_LENGTH(sep_obj);
if (kind1 < kind2 || len1 < len2) {
PyObject *empty = unicode_get_empty(); // Borrowed reference
if (!empty) {
out = NULL;
}
else {
out = PyTuple_Pack(3, empty, empty, str_obj);
}
return out;
return PyTuple_Pack(3, empty, empty, str_obj);
}
buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj);
@ -15589,12 +15551,20 @@ _PyUnicode_Init(PyThreadState *tstate)
0x2029, /* PARAGRAPH SEPARATOR */
};
/* Init the implementation */
PyObject *empty = unicode_new_empty();
if (!empty) {
// Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be
// optimized to always use state->empty without having to check if it is
// NULL or not.
PyObject *empty = PyUnicode_New(1, 0);
if (empty == NULL) {
return _PyStatus_NO_MEMORY();
}
Py_DECREF(empty);
PyUnicode_1BYTE_DATA(empty)[0] = 0;
_PyUnicode_LENGTH(empty) = 0;
assert(_PyUnicode_CheckConsistency(empty, 1));
struct _Py_unicode_state *state = &tstate->interp->unicode;
assert(state->empty == NULL);
state->empty = empty;
if (_Py_IsMainInterpreter(tstate)) {
/* initialize the linebreak bloom filter */