mirror of https://github.com/python/cpython
bpo-40521: Optimize PyBytes_FromStringAndSize(str, 0) (GH-21142)
Always create the empty bytes string singleton. Optimize PyBytes_FromStringAndSize(str, 0): it no longer has to check if the empty string singleton was created or not, it is always available. Add functions: * _PyBytes_Init() * bytes_get_empty(), bytes_new_empty() * bytes_create_empty_string_singleton() * unicode_create_empty_string_singleton() _Py_unicode_state: rename empty structure member to empty_string.
This commit is contained in:
parent
0f8ec1fff0
commit
91698d8caa
|
@ -66,13 +66,13 @@ struct _Py_unicode_fs_codec {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct _Py_bytes_state {
|
struct _Py_bytes_state {
|
||||||
|
PyObject *empty_string;
|
||||||
PyBytesObject *characters[256];
|
PyBytesObject *characters[256];
|
||||||
PyBytesObject *empty_string;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct _Py_unicode_state {
|
struct _Py_unicode_state {
|
||||||
// The empty Unicode object is a singleton to improve performance.
|
// The empty Unicode object is a singleton to improve performance.
|
||||||
PyObject *empty;
|
PyObject *empty_string;
|
||||||
/* Single character Unicode strings in the Latin-1 range are being
|
/* Single character Unicode strings in the Latin-1 range are being
|
||||||
shared as well. */
|
shared as well. */
|
||||||
PyObject *latin1[256];
|
PyObject *latin1[256];
|
||||||
|
|
|
@ -32,6 +32,7 @@ PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc);
|
||||||
/* Various one-time initializers */
|
/* Various one-time initializers */
|
||||||
|
|
||||||
extern PyStatus _PyUnicode_Init(PyThreadState *tstate);
|
extern PyStatus _PyUnicode_Init(PyThreadState *tstate);
|
||||||
|
extern PyStatus _PyBytes_Init(PyThreadState *tstate);
|
||||||
extern int _PyStructSequence_Init(void);
|
extern int _PyStructSequence_Init(void);
|
||||||
extern int _PyLong_Init(PyThreadState *tstate);
|
extern int _PyLong_Init(PyThreadState *tstate);
|
||||||
extern PyStatus _PyTuple_Init(PyThreadState *tstate);
|
extern PyStatus _PyTuple_Init(PyThreadState *tstate);
|
||||||
|
|
|
@ -4,8 +4,9 @@
|
||||||
|
|
||||||
#include "Python.h"
|
#include "Python.h"
|
||||||
#include "pycore_abstract.h" // _PyIndex_Check()
|
#include "pycore_abstract.h" // _PyIndex_Check()
|
||||||
#include "pycore_bytes_methods.h"
|
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
|
||||||
#include "pycore_object.h"
|
#include "pycore_initconfig.h" // _PyStatus_OK()
|
||||||
|
#include "pycore_object.h" // _PyObject_GC_TRACK
|
||||||
#include "pycore_pymem.h" // PYMEM_CLEANBYTE
|
#include "pycore_pymem.h" // PYMEM_CLEANBYTE
|
||||||
|
|
||||||
#include "pystrhex.h"
|
#include "pystrhex.h"
|
||||||
|
@ -41,6 +42,44 @@ get_bytes_state(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Return a borrowed reference to the empty bytes string singleton.
|
||||||
|
static inline PyObject* bytes_get_empty(void)
|
||||||
|
{
|
||||||
|
struct _Py_bytes_state *state = get_bytes_state();
|
||||||
|
// bytes_get_empty() must not be called before _PyBytes_Init()
|
||||||
|
// or after _PyBytes_Fini()
|
||||||
|
assert(state->empty_string != NULL);
|
||||||
|
return state->empty_string;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Return a strong reference to the empty bytes string singleton.
|
||||||
|
static inline PyObject* bytes_new_empty(void)
|
||||||
|
{
|
||||||
|
PyObject *empty = bytes_get_empty();
|
||||||
|
Py_INCREF(empty);
|
||||||
|
return (PyObject *)empty;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int
|
||||||
|
bytes_create_empty_string_singleton(struct _Py_bytes_state *state)
|
||||||
|
{
|
||||||
|
// Create the empty bytes string singleton
|
||||||
|
PyBytesObject *op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE);
|
||||||
|
if (op == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
_PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, 0);
|
||||||
|
op->ob_shash = -1;
|
||||||
|
op->ob_sval[0] = '\0';
|
||||||
|
|
||||||
|
assert(state->empty_string == NULL);
|
||||||
|
state->empty_string = (PyObject *)op;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
For PyBytes_FromString(), the parameter `str' points to a null-terminated
|
For PyBytes_FromString(), the parameter `str' points to a null-terminated
|
||||||
string containing exactly `size' bytes.
|
string containing exactly `size' bytes.
|
||||||
|
@ -70,12 +109,7 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
|
||||||
assert(size >= 0);
|
assert(size >= 0);
|
||||||
|
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
struct _Py_bytes_state *state = get_bytes_state();
|
return bytes_new_empty();
|
||||||
op = state->empty_string;
|
|
||||||
if (op != NULL) {
|
|
||||||
Py_INCREF(op);
|
|
||||||
return (PyObject *)op;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
|
if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
|
||||||
|
@ -94,13 +128,8 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
|
||||||
}
|
}
|
||||||
_PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
|
_PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
|
||||||
op->ob_shash = -1;
|
op->ob_shash = -1;
|
||||||
if (!use_calloc)
|
if (!use_calloc) {
|
||||||
op->ob_sval[size] = '\0';
|
op->ob_sval[size] = '\0';
|
||||||
/* empty byte string singleton */
|
|
||||||
if (size == 0) {
|
|
||||||
struct _Py_bytes_state *state = get_bytes_state();
|
|
||||||
Py_INCREF(op);
|
|
||||||
state->empty_string = op;
|
|
||||||
}
|
}
|
||||||
return (PyObject *) op;
|
return (PyObject *) op;
|
||||||
}
|
}
|
||||||
|
@ -122,6 +151,9 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
|
||||||
return (PyObject *)op;
|
return (PyObject *)op;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (size == 0) {
|
||||||
|
return bytes_new_empty();
|
||||||
|
}
|
||||||
|
|
||||||
op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
|
op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
|
||||||
if (op == NULL)
|
if (op == NULL)
|
||||||
|
@ -155,11 +187,7 @@ PyBytes_FromString(const char *str)
|
||||||
|
|
||||||
struct _Py_bytes_state *state = get_bytes_state();
|
struct _Py_bytes_state *state = get_bytes_state();
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
op = state->empty_string;
|
return bytes_new_empty();
|
||||||
if (op != NULL) {
|
|
||||||
Py_INCREF(op);
|
|
||||||
return (PyObject *)op;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else if (size == 1) {
|
else if (size == 1) {
|
||||||
op = state->characters[*str & UCHAR_MAX];
|
op = state->characters[*str & UCHAR_MAX];
|
||||||
|
@ -178,11 +206,8 @@ PyBytes_FromString(const char *str)
|
||||||
op->ob_shash = -1;
|
op->ob_shash = -1;
|
||||||
memcpy(op->ob_sval, str, size+1);
|
memcpy(op->ob_sval, str, size+1);
|
||||||
/* share short strings */
|
/* share short strings */
|
||||||
if (size == 0) {
|
if (size == 1) {
|
||||||
Py_INCREF(op);
|
assert(state->characters[*str & UCHAR_MAX] == NULL);
|
||||||
state->empty_string = op;
|
|
||||||
}
|
|
||||||
else if (size == 1) {
|
|
||||||
Py_INCREF(op);
|
Py_INCREF(op);
|
||||||
state->characters[*str & UCHAR_MAX] = op;
|
state->characters[*str & UCHAR_MAX] = op;
|
||||||
}
|
}
|
||||||
|
@ -1272,7 +1297,7 @@ PyBytes_AsStringAndSize(PyObject *obj,
|
||||||
/* -------------------------------------------------------------------- */
|
/* -------------------------------------------------------------------- */
|
||||||
/* Methods */
|
/* Methods */
|
||||||
|
|
||||||
#define STRINGLIB_GET_EMPTY() get_bytes_state()->empty_string
|
#define STRINGLIB_GET_EMPTY() bytes_get_empty()
|
||||||
|
|
||||||
#include "stringlib/stringdefs.h"
|
#include "stringlib/stringdefs.h"
|
||||||
|
|
||||||
|
@ -3053,9 +3078,9 @@ _PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
if (newsize == 0) {
|
if (newsize == 0) {
|
||||||
*pv = _PyBytes_FromSize(0, 0);
|
*pv = bytes_new_empty();
|
||||||
Py_DECREF(v);
|
Py_DECREF(v);
|
||||||
return (*pv == NULL) ? -1 : 0;
|
return 0;
|
||||||
}
|
}
|
||||||
/* XXX UNREF/NEWREF interface should be more symmetrical */
|
/* XXX UNREF/NEWREF interface should be more symmetrical */
|
||||||
#ifdef Py_REF_DEBUG
|
#ifdef Py_REF_DEBUG
|
||||||
|
@ -3084,6 +3109,18 @@ error:
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PyStatus
|
||||||
|
_PyBytes_Init(PyThreadState *tstate)
|
||||||
|
{
|
||||||
|
struct _Py_bytes_state *state = &tstate->interp->bytes;
|
||||||
|
if (bytes_create_empty_string_singleton(state) < 0) {
|
||||||
|
return _PyStatus_NO_MEMORY();
|
||||||
|
}
|
||||||
|
return _PyStatus_OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
_PyBytes_Fini(PyThreadState *tstate)
|
_PyBytes_Fini(PyThreadState *tstate)
|
||||||
{
|
{
|
||||||
|
|
|
@ -41,16 +41,15 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
#define PY_SSIZE_T_CLEAN
|
#define PY_SSIZE_T_CLEAN
|
||||||
#include "Python.h"
|
#include "Python.h"
|
||||||
#include "pycore_abstract.h" // _PyIndex_Check()
|
#include "pycore_abstract.h" // _PyIndex_Check()
|
||||||
#include "pycore_bytes_methods.h"
|
#include "pycore_bytes_methods.h" // _Py_bytes_lower()
|
||||||
#include "pycore_fileutils.h"
|
#include "pycore_initconfig.h" // _PyStatus_OK()
|
||||||
#include "pycore_initconfig.h"
|
|
||||||
#include "pycore_interp.h" // PyInterpreterState.fs_codec
|
#include "pycore_interp.h" // PyInterpreterState.fs_codec
|
||||||
#include "pycore_object.h"
|
#include "pycore_object.h" // _PyObject_GC_TRACK()
|
||||||
#include "pycore_pathconfig.h"
|
#include "pycore_pathconfig.h" // _Py_DumpPathConfig()
|
||||||
#include "pycore_pylifecycle.h"
|
#include "pycore_pylifecycle.h" // _Py_SetFileSystemEncoding()
|
||||||
#include "pycore_pystate.h" // _PyInterpreterState_GET()
|
#include "pycore_pystate.h" // _PyInterpreterState_GET()
|
||||||
#include "ucnhash.h"
|
#include "ucnhash.h" // _PyUnicode_Name_CAPI
|
||||||
#include "stringlib/eq.h"
|
#include "stringlib/eq.h" // unicode_eq()
|
||||||
|
|
||||||
#ifdef MS_WINDOWS
|
#ifdef MS_WINDOWS
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
|
@ -236,10 +235,12 @@ static inline PyObject* unicode_get_empty(void)
|
||||||
struct _Py_unicode_state *state = get_unicode_state();
|
struct _Py_unicode_state *state = get_unicode_state();
|
||||||
// unicode_get_empty() must not be called before _PyUnicode_Init()
|
// unicode_get_empty() must not be called before _PyUnicode_Init()
|
||||||
// or after _PyUnicode_Fini()
|
// or after _PyUnicode_Fini()
|
||||||
assert(state->empty != NULL);
|
assert(state->empty_string != NULL);
|
||||||
return state->empty;
|
return state->empty_string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Return a strong reference to the empty string singleton.
|
||||||
static inline PyObject* unicode_new_empty(void)
|
static inline PyObject* unicode_new_empty(void)
|
||||||
{
|
{
|
||||||
PyObject *empty = unicode_get_empty();
|
PyObject *empty = unicode_get_empty();
|
||||||
|
@ -1385,6 +1386,26 @@ _PyUnicode_Dump(PyObject *op)
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static int
|
||||||
|
unicode_create_empty_string_singleton(struct _Py_unicode_state *state)
|
||||||
|
{
|
||||||
|
// Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be
|
||||||
|
// optimized to always use state->empty_string without having to check if
|
||||||
|
// it is NULL or not.
|
||||||
|
PyObject *empty = PyUnicode_New(1, 0);
|
||||||
|
if (empty == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
PyUnicode_1BYTE_DATA(empty)[0] = 0;
|
||||||
|
_PyUnicode_LENGTH(empty) = 0;
|
||||||
|
assert(_PyUnicode_CheckConsistency(empty, 1));
|
||||||
|
|
||||||
|
assert(state->empty_string == NULL);
|
||||||
|
state->empty_string = empty;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
|
PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
|
||||||
{
|
{
|
||||||
|
@ -1972,7 +1993,7 @@ static int
|
||||||
unicode_is_singleton(PyObject *unicode)
|
unicode_is_singleton(PyObject *unicode)
|
||||||
{
|
{
|
||||||
struct _Py_unicode_state *state = get_unicode_state();
|
struct _Py_unicode_state *state = get_unicode_state();
|
||||||
if (unicode == state->empty) {
|
if (unicode == state->empty_string) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
|
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
|
||||||
|
@ -15542,20 +15563,10 @@ _PyUnicode_Init(PyThreadState *tstate)
|
||||||
0x2029, /* PARAGRAPH SEPARATOR */
|
0x2029, /* PARAGRAPH SEPARATOR */
|
||||||
};
|
};
|
||||||
|
|
||||||
// Use size=1 rather than size=0, so PyUnicode_New(0, maxchar) can be
|
struct _Py_unicode_state *state = &tstate->interp->unicode;
|
||||||
// optimized to always use state->empty without having to check if it is
|
if (unicode_create_empty_string_singleton(state) < 0) {
|
||||||
// NULL or not.
|
|
||||||
PyObject *empty = PyUnicode_New(1, 0);
|
|
||||||
if (empty == NULL) {
|
|
||||||
return _PyStatus_NO_MEMORY();
|
return _PyStatus_NO_MEMORY();
|
||||||
}
|
}
|
||||||
PyUnicode_1BYTE_DATA(empty)[0] = 0;
|
|
||||||
_PyUnicode_LENGTH(empty) = 0;
|
|
||||||
assert(_PyUnicode_CheckConsistency(empty, 1));
|
|
||||||
|
|
||||||
struct _Py_unicode_state *state = &tstate->interp->unicode;
|
|
||||||
assert(state->empty == NULL);
|
|
||||||
state->empty = empty;
|
|
||||||
|
|
||||||
if (_Py_IsMainInterpreter(tstate)) {
|
if (_Py_IsMainInterpreter(tstate)) {
|
||||||
/* initialize the linebreak bloom filter */
|
/* initialize the linebreak bloom filter */
|
||||||
|
@ -16223,7 +16234,7 @@ _PyUnicode_Fini(PyThreadState *tstate)
|
||||||
#endif /* __INSURE__ */
|
#endif /* __INSURE__ */
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_CLEAR(state->empty);
|
Py_CLEAR(state->empty_string);
|
||||||
|
|
||||||
for (Py_ssize_t i = 0; i < 256; i++) {
|
for (Py_ssize_t i = 0; i < 256; i++) {
|
||||||
Py_CLEAR(state->latin1[i]);
|
Py_CLEAR(state->latin1[i]);
|
||||||
|
|
|
@ -607,6 +607,11 @@ pycore_init_types(PyThreadState *tstate)
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
status = _PyBytes_Init(tstate);
|
||||||
|
if (_PyStatus_EXCEPTION(status)) {
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
status = _PyExc_Init(tstate);
|
status = _PyExc_Init(tstate);
|
||||||
if (_PyStatus_EXCEPTION(status)) {
|
if (_PyStatus_EXCEPTION(status)) {
|
||||||
return status;
|
return status;
|
||||||
|
|
Loading…
Reference in New Issue