bpo-40521: Make bytes singletons per interpreter (GH-21074)

Each interpreter now has its own empty bytes string and single byte
character singletons.

Replace STRINGLIB_EMPTY macro with STRINGLIB_GET_EMPTY() macro.
This commit is contained in:
Victor Stinner 2020-06-23 15:54:35 +02:00 committed by GitHub
parent 32f2eda859
commit c41eed1a87
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 96 additions and 53 deletions

View File

@ -65,6 +65,11 @@ struct _Py_unicode_fs_codec {
_Py_error_handler error_handler;
};
struct _Py_bytes_state {
PyBytesObject *characters[256];
PyBytesObject *empty_string;
};
struct _Py_unicode_state {
struct _Py_unicode_fs_codec fs_codec;
};
@ -233,6 +238,7 @@ struct _is {
*/
PyLongObject* small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS];
#endif
struct _Py_bytes_state bytes;
struct _Py_unicode_state unicode;
struct _Py_float_state float_state;
/* Using a cache is very effective since typically only a single slice is

View File

@ -63,7 +63,7 @@ extern void _PyDict_Fini(PyThreadState *tstate);
extern void _PyTuple_Fini(PyThreadState *tstate);
extern void _PyList_Fini(PyThreadState *tstate);
extern void _PySet_Fini(PyThreadState *tstate);
extern void _PyBytes_Fini(void);
extern void _PyBytes_Fini(PyThreadState *tstate);
extern void _PyFloat_Fini(PyThreadState *tstate);
extern void _PySlice_Fini(PyThreadState *tstate);
extern void _PyAsyncGen_Fini(PyThreadState *tstate);

View File

@ -1,5 +1,9 @@
The tuple free lists, the empty tuple singleton, the list free list, the empty
frozenset singleton, the float free list, the slice cache, the dict free lists,
the frame free list, the asynchronous generator free lists, and the context
free list are no longer shared by all interpreters: each interpreter now its
has own free lists and caches.
Each interpreter now its has own free lists, singletons and caches:
* Free lists: float, tuple, list, dict, frame, context,
asynchronous generator.
* Singletons: empty tuple, empty frozenset, empty bytes string,
single byte character.
* Slice cache.
They are no longer shared by all interpreters.

View File

@ -18,9 +18,6 @@ class bytes "PyBytesObject *" "&PyBytes_Type"
#include "clinic/bytesobject.c.h"
static PyBytesObject *characters[UCHAR_MAX + 1];
static PyBytesObject *nullstring;
_Py_IDENTIFIER(__bytes__);
/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
@ -35,6 +32,15 @@ _Py_IDENTIFIER(__bytes__);
Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
char *str);
static struct _Py_bytes_state*
get_bytes_state(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
return &interp->bytes;
}
/*
For PyBytes_FromString(), the parameter `str' points to a null-terminated
string containing exactly `size' bytes.
@ -63,9 +69,13 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
PyBytesObject *op;
assert(size >= 0);
if (size == 0 && (op = nullstring) != NULL) {
Py_INCREF(op);
return (PyObject *)op;
if (size == 0) {
struct _Py_bytes_state *state = get_bytes_state();
op = state->empty_string;
if (op != NULL) {
Py_INCREF(op);
return (PyObject *)op;
}
}
if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
@ -88,8 +98,9 @@ _PyBytes_FromSize(Py_ssize_t size, int use_calloc)
op->ob_sval[size] = '\0';
/* empty byte string singleton */
if (size == 0) {
nullstring = op;
struct _Py_bytes_state *state = get_bytes_state();
Py_INCREF(op);
state->empty_string = op;
}
return (PyObject *) op;
}
@ -103,11 +114,13 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
"Negative size passed to PyBytes_FromStringAndSize");
return NULL;
}
if (size == 1 && str != NULL &&
(op = characters[*str & UCHAR_MAX]) != NULL)
{
Py_INCREF(op);
return (PyObject *)op;
if (size == 1 && str != NULL) {
struct _Py_bytes_state *state = get_bytes_state();
op = state->characters[*str & UCHAR_MAX];
if (op != NULL) {
Py_INCREF(op);
return (PyObject *)op;
}
}
op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
@ -119,8 +132,9 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
memcpy(op->ob_sval, str, size);
/* share short strings */
if (size == 1) {
characters[*str & UCHAR_MAX] = op;
struct _Py_bytes_state *state = get_bytes_state();
Py_INCREF(op);
state->characters[*str & UCHAR_MAX] = op;
}
return (PyObject *) op;
}
@ -138,13 +152,21 @@ PyBytes_FromString(const char *str)
"byte string is too long");
return NULL;
}
if (size == 0 && (op = nullstring) != NULL) {
Py_INCREF(op);
return (PyObject *)op;
struct _Py_bytes_state *state = get_bytes_state();
if (size == 0) {
op = state->empty_string;
if (op != NULL) {
Py_INCREF(op);
return (PyObject *)op;
}
}
if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Py_INCREF(op);
return (PyObject *)op;
else if (size == 1) {
op = state->characters[*str & UCHAR_MAX];
if (op != NULL) {
Py_INCREF(op);
return (PyObject *)op;
}
}
/* Inline PyObject_NewVar */
@ -157,11 +179,12 @@ PyBytes_FromString(const char *str)
memcpy(op->ob_sval, str, size+1);
/* share short strings */
if (size == 0) {
nullstring = op;
Py_INCREF(op);
} else if (size == 1) {
characters[*str & UCHAR_MAX] = op;
state->empty_string = op;
}
else if (size == 1) {
Py_INCREF(op);
state->characters[*str & UCHAR_MAX] = op;
}
return (PyObject *) op;
}
@ -1249,6 +1272,8 @@ PyBytes_AsStringAndSize(PyObject *obj,
/* -------------------------------------------------------------------- */
/* Methods */
#define STRINGLIB_GET_EMPTY() get_bytes_state()->empty_string
#include "stringlib/stringdefs.h"
#include "stringlib/fastsearch.h"
@ -1261,6 +1286,8 @@ PyBytes_AsStringAndSize(PyObject *obj,
#include "stringlib/transmogrify.h"
#undef STRINGLIB_GET_EMPTY
PyObject *
PyBytes_Repr(PyObject *obj, int smartquotes)
{
@ -3058,12 +3085,13 @@ error:
}
void
_PyBytes_Fini(void)
_PyBytes_Fini(PyThreadState *tstate)
{
int i;
for (i = 0; i < UCHAR_MAX + 1; i++)
Py_CLEAR(characters[i]);
Py_CLEAR(nullstring);
struct _Py_bytes_state* state = &tstate->interp->bytes;
for (int i = 0; i < UCHAR_MAX + 1; i++) {
Py_CLEAR(state->characters[i]);
}
Py_CLEAR(state->empty_string);
}
/*********************** Bytes Iterator ****************************/

View File

@ -11,10 +11,10 @@ STRINGLIB_CHAR
the type used to hold a character (char or Py_UNICODE)
STRINGLIB_EMPTY
STRINGLIB_GET_EMPTY()
a PyObject representing the empty string, only to be used if
STRINGLIB_MUTABLE is 0
returns a PyObject representing the empty string, only to be used if
STRINGLIB_MUTABLE is 0. It must not be NULL.
Py_ssize_t STRINGLIB_LEN(PyObject*)

View File

@ -11,7 +11,7 @@
#define STRINGLIB_CHAR Py_UCS1
#define STRINGLIB_TYPE_NAME "unicode"
#define STRINGLIB_PARSE_CODE "U"
#define STRINGLIB_EMPTY unicode_empty
#define STRINGLIB_GET_EMPTY() unicode_empty
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL

View File

@ -37,10 +37,12 @@ STRINGLIB(partition)(PyObject* str_obj,
#else
Py_INCREF(str_obj);
PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY);
PyObject *empty = (PyObject*)STRINGLIB_GET_EMPTY();
assert(empty != NULL);
Py_INCREF(empty);
PyTuple_SET_ITEM(out, 1, empty);
Py_INCREF(empty);
PyTuple_SET_ITEM(out, 2, empty);
#endif
return out;
}
@ -90,10 +92,12 @@ STRINGLIB(rpartition)(PyObject* str_obj,
return NULL;
}
#else
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);
Py_INCREF(STRINGLIB_EMPTY);
PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
PyObject *empty = (PyObject*)STRINGLIB_GET_EMPTY();
assert(empty != NULL);
Py_INCREF(empty);
PyTuple_SET_ITEM(out, 0, empty);
Py_INCREF(empty);
PyTuple_SET_ITEM(out, 1, empty);
Py_INCREF(str_obj);
PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj);
#endif

View File

@ -1,6 +1,10 @@
#ifndef STRINGLIB_STRINGDEFS_H
#define STRINGLIB_STRINGDEFS_H
#ifndef STRINGLIB_GET_EMPTY
# error "STRINGLIB_GET_EMPTY macro must be defined"
#endif
/* this is sort of a hack. there's at least one place (formatting
floats) where some stringlib code takes a different path if it's
compiled as unicode. */
@ -13,7 +17,6 @@
#define STRINGLIB_CHAR char
#define STRINGLIB_TYPE_NAME "string"
#define STRINGLIB_PARSE_CODE "S"
#define STRINGLIB_EMPTY nullstring
#define STRINGLIB_ISSPACE Py_ISSPACE
#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
#define STRINGLIB_ISDECIMAL(x) ((x >= '0') && (x <= '9'))

View File

@ -11,7 +11,7 @@
#define STRINGLIB_CHAR Py_UCS1
#define STRINGLIB_TYPE_NAME "unicode"
#define STRINGLIB_PARSE_CODE "U"
#define STRINGLIB_EMPTY unicode_empty
#define STRINGLIB_GET_EMPTY() unicode_empty
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL

View File

@ -11,7 +11,7 @@
#define STRINGLIB_CHAR Py_UCS2
#define STRINGLIB_TYPE_NAME "unicode"
#define STRINGLIB_PARSE_CODE "U"
#define STRINGLIB_EMPTY unicode_empty
#define STRINGLIB_GET_EMPTY() unicode_empty
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL

View File

@ -11,7 +11,7 @@
#define STRINGLIB_CHAR Py_UCS4
#define STRINGLIB_TYPE_NAME "unicode"
#define STRINGLIB_PARSE_CODE "U"
#define STRINGLIB_EMPTY unicode_empty
#define STRINGLIB_GET_EMPTY() unicode_empty
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL

View File

@ -13,7 +13,7 @@
#define STRINGLIB_CHAR Py_UNICODE
#define STRINGLIB_TYPE_NAME "unicode"
#define STRINGLIB_PARSE_CODE "U"
#define STRINGLIB_EMPTY unicode_empty
#define STRINGLIB_GET_EMPTY() unicode_empty
#define STRINGLIB_ISSPACE Py_UNICODE_ISSPACE
#define STRINGLIB_ISLINEBREAK BLOOM_LINEBREAK
#define STRINGLIB_ISDECIMAL Py_UNICODE_ISDECIMAL

View File

@ -1262,9 +1262,7 @@ finalize_interp_types(PyThreadState *tstate, int is_main_interp)
_PySlice_Fini(tstate);
if (is_main_interp) {
_PyBytes_Fini();
}
_PyBytes_Fini(tstate);
_PyUnicode_Fini(tstate);
_PyFloat_Fini(tstate);
_PyLong_Fini(tstate);