bpo-40521: Optimize PyUnicode_New(0, maxchar) (GH-21099)

Functions of unicodeobject.c, like PyUnicode_New(), no longer check
if the empty Unicode singleton has been initialized or not. Consider
that it is always initialized. The Unicode API must not be used
before _PyUnicode_Init() or after _PyUnicode_Fini().
This commit is contained in:
Victor Stinner 2020-06-24 00:34:07 +02:00 committed by GitHub
parent f363d0a6e9
commit 90ed8a6d71
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 25 additions and 55 deletions

View File

@ -231,28 +231,19 @@ get_unicode_state(void)
// Return a borrowed reference to the empty string singleton. // Return a borrowed reference to the empty string singleton.
// Return NULL if the singleton was not created yet.
static inline PyObject* unicode_get_empty(void) static inline PyObject* unicode_get_empty(void)
{ {
struct _Py_unicode_state *state = get_unicode_state(); struct _Py_unicode_state *state = get_unicode_state();
// unicode_get_empty() must not be called before _PyUnicode_Init()
// or after _PyUnicode_Fini()
assert(state->empty != NULL);
return state->empty; return state->empty;
} }
static inline PyObject* unicode_new_empty(void) static inline PyObject* unicode_new_empty(void)
{ {
struct _Py_unicode_state *state = get_unicode_state(); PyObject *empty = unicode_get_empty();
PyObject *empty = state->empty; Py_INCREF(empty);
if (empty != NULL) {
Py_INCREF(empty);
}
else {
empty = PyUnicode_New(0, 0);
if (empty != NULL) {
Py_INCREF(empty);
assert(_PyUnicode_CheckConsistency(empty, 1));
state->empty = empty;
}
}
return empty; return empty;
} }
@ -696,12 +687,9 @@ unicode_result_ready(PyObject *unicode)
PyObject *empty = unicode_get_empty(); PyObject *empty = unicode_get_empty();
if (unicode != empty) { if (unicode != empty) {
Py_DECREF(unicode); Py_DECREF(unicode);
Py_INCREF(empty); Py_INCREF(empty);
return empty;
} }
// unicode is the empty string singleton return empty;
return unicode;
} }
#ifdef LATIN1_SINGLETONS #ifdef LATIN1_SINGLETONS
@ -959,7 +947,7 @@ ensure_unicode(PyObject *obj)
/* Compilation of templated routines */ /* Compilation of templated routines */
#define STRINGLIB_GET_EMPTY() unicode_get_empty() #define STRINGLIB_GET_EMPTY() unicode_get_empty()
#include "stringlib/asciilib.h" #include "stringlib/asciilib.h"
#include "stringlib/fastsearch.h" #include "stringlib/fastsearch.h"
@ -1260,11 +1248,7 @@ _PyUnicode_New(Py_ssize_t length)
/* Optimization for empty strings */ /* Optimization for empty strings */
if (length == 0) { if (length == 0) {
PyObject *empty = unicode_get_empty(); return (PyUnicodeObject *)unicode_new_empty();
if (empty != NULL) {
Py_INCREF(empty);
return (PyUnicodeObject *)empty;
}
} }
/* Ensure we won't overflow the size. */ /* Ensure we won't overflow the size. */
@ -1416,11 +1400,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
{ {
/* Optimization for empty strings */ /* Optimization for empty strings */
if (size == 0) { if (size == 0) {
PyObject *empty = unicode_get_empty(); return unicode_new_empty();
if (empty != NULL) {
Py_INCREF(empty);
return empty;
}
} }
PyObject *obj; PyObject *obj;
@ -2001,8 +1981,7 @@ unicode_dealloc(PyObject *unicode)
static int static int
unicode_is_singleton(PyObject *unicode) unicode_is_singleton(PyObject *unicode)
{ {
struct _Py_unicode_state *state = get_unicode_state(); if (unicode == unicode_get_empty()) {
if (unicode == state->empty) {
return 1; return 1;
} }
#ifdef LATIN1_SINGLETONS #ifdef LATIN1_SINGLETONS
@ -2059,8 +2038,6 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
if (length == 0) { if (length == 0) {
PyObject *empty = unicode_new_empty(); PyObject *empty = unicode_new_empty();
if (!empty)
return -1;
Py_SETREF(*p_unicode, empty); Py_SETREF(*p_unicode, empty);
return 0; return 0;
} }
@ -10868,10 +10845,7 @@ replace(PyObject *self, PyObject *str1,
} }
new_size = slen + n * (len2 - len1); new_size = slen + n * (len2 - len1);
if (new_size == 0) { if (new_size == 0) {
PyObject *empty = unicode_new_empty(); u = unicode_new_empty();
if (!empty)
goto error;
u = empty;
goto done; goto done;
} }
if (new_size > (PY_SSIZE_T_MAX / rkind)) { if (new_size > (PY_SSIZE_T_MAX / rkind)) {
@ -13293,13 +13267,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
len2 = PyUnicode_GET_LENGTH(sep_obj); len2 = PyUnicode_GET_LENGTH(sep_obj);
if (kind1 < kind2 || len1 < len2) { if (kind1 < kind2 || len1 < len2) {
PyObject *empty = unicode_get_empty(); // Borrowed reference PyObject *empty = unicode_get_empty(); // Borrowed reference
if (!empty) { return PyTuple_Pack(3, str_obj, empty, empty);
out = NULL;
}
else {
out = PyTuple_Pack(3, str_obj, empty, empty);
}
return out;
} }
buf1 = PyUnicode_DATA(str_obj); buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj); buf2 = PyUnicode_DATA(sep_obj);
@ -13351,13 +13319,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
len2 = PyUnicode_GET_LENGTH(sep_obj); len2 = PyUnicode_GET_LENGTH(sep_obj);
if (kind1 < kind2 || len1 < len2) { if (kind1 < kind2 || len1 < len2) {
PyObject *empty = unicode_get_empty(); // Borrowed reference PyObject *empty = unicode_get_empty(); // Borrowed reference
if (!empty) { return PyTuple_Pack(3, empty, empty, str_obj);
out = NULL;
}
else {
out = PyTuple_Pack(3, empty, empty, str_obj);
}
return out;
} }
buf1 = PyUnicode_DATA(str_obj); buf1 = PyUnicode_DATA(str_obj);
buf2 = PyUnicode_DATA(sep_obj); buf2 = PyUnicode_DATA(sep_obj);
@ -15589,12 +15551,20 @@ _PyUnicode_Init(PyThreadState *tstate)
0x2029, /* PARAGRAPH SEPARATOR */ 0x2029, /* PARAGRAPH SEPARATOR */
}; };
/* Init the implementation */ // Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be
PyObject *empty = unicode_new_empty(); // optimized to always use state->empty without having to check if it is
if (!empty) { // NULL or not.
PyObject *empty = PyUnicode_New(1, 0);
if (empty == NULL) {
return _PyStatus_NO_MEMORY(); return _PyStatus_NO_MEMORY();
} }
Py_DECREF(empty); PyUnicode_1BYTE_DATA(empty)[0] = 0;
_PyUnicode_LENGTH(empty) = 0;
assert(_PyUnicode_CheckConsistency(empty, 1));
struct _Py_unicode_state *state = &tstate->interp->unicode;
assert(state->empty == NULL);
state->empty = empty;
if (_Py_IsMainInterpreter(tstate)) { if (_Py_IsMainInterpreter(tstate)) {
/* initialize the linebreak bloom filter */ /* initialize the linebreak bloom filter */