bpo-45953: Statically allocate and initialize global bytes objects. (gh-30096)

The empty bytes object (b'') and the 256 one-character bytes objects were allocated at runtime init.  Now we statically allocate and initialize them.

https://bugs.python.org/issue45953
This commit is contained in:
Eric Snow 2022-01-11 09:37:24 -07:00 committed by GitHub
parent 6f05e1ec19
commit cf496d657a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 294 additions and 96 deletions

View File

@ -11,17 +11,7 @@ extern "C" {
/* runtime lifecycle */
extern PyStatus _PyBytes_InitGlobalObjects(PyInterpreterState *);
extern PyStatus _PyBytes_InitTypes(PyInterpreterState *);
extern void _PyBytes_Fini(PyInterpreterState *);
/* other API */
struct _Py_bytes_state {
PyObject *empty_string;
PyBytesObject *characters[256];
};
#ifdef __cplusplus

View File

@ -34,6 +34,20 @@ extern "C" {
}
/* bytes objects */
#define _PyBytes_SIMPLE_INIT(CH, LEN) \
{ \
_PyVarObject_IMMORTAL_INIT(&PyBytes_Type, LEN), \
.ob_shash = -1, \
.ob_sval = { CH }, \
}
#define _PyBytes_CHAR_INIT(CH) \
{ \
_PyBytes_SIMPLE_INIT(CH, 1) \
}
/**********************
* the global objects *
**********************/
@ -54,6 +68,12 @@ struct _Py_global_objects {
* -_PY_NSMALLNEGINTS (inclusive) to _PY_NSMALLPOSINTS (exclusive).
*/
PyLongObject small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS];
PyBytesObject bytes_empty;
struct {
PyBytesObject ob;
char eos;
} bytes_characters[256];
} singletons;
};
@ -323,6 +343,266 @@ struct _Py_global_objects {
_PyLong_DIGIT_INIT(255), \
_PyLong_DIGIT_INIT(256), \
}, \
\
.bytes_empty = _PyBytes_SIMPLE_INIT(0, 0), \
.bytes_characters = { \
_PyBytes_CHAR_INIT(0), \
_PyBytes_CHAR_INIT(1), \
_PyBytes_CHAR_INIT(2), \
_PyBytes_CHAR_INIT(3), \
_PyBytes_CHAR_INIT(4), \
_PyBytes_CHAR_INIT(5), \
_PyBytes_CHAR_INIT(6), \
_PyBytes_CHAR_INIT(7), \
_PyBytes_CHAR_INIT(8), \
_PyBytes_CHAR_INIT(9), \
_PyBytes_CHAR_INIT(10), \
_PyBytes_CHAR_INIT(11), \
_PyBytes_CHAR_INIT(12), \
_PyBytes_CHAR_INIT(13), \
_PyBytes_CHAR_INIT(14), \
_PyBytes_CHAR_INIT(15), \
_PyBytes_CHAR_INIT(16), \
_PyBytes_CHAR_INIT(17), \
_PyBytes_CHAR_INIT(18), \
_PyBytes_CHAR_INIT(19), \
_PyBytes_CHAR_INIT(20), \
_PyBytes_CHAR_INIT(21), \
_PyBytes_CHAR_INIT(22), \
_PyBytes_CHAR_INIT(23), \
_PyBytes_CHAR_INIT(24), \
_PyBytes_CHAR_INIT(25), \
_PyBytes_CHAR_INIT(26), \
_PyBytes_CHAR_INIT(27), \
_PyBytes_CHAR_INIT(28), \
_PyBytes_CHAR_INIT(29), \
_PyBytes_CHAR_INIT(30), \
_PyBytes_CHAR_INIT(31), \
_PyBytes_CHAR_INIT(32), \
_PyBytes_CHAR_INIT(33), \
_PyBytes_CHAR_INIT(34), \
_PyBytes_CHAR_INIT(35), \
_PyBytes_CHAR_INIT(36), \
_PyBytes_CHAR_INIT(37), \
_PyBytes_CHAR_INIT(38), \
_PyBytes_CHAR_INIT(39), \
_PyBytes_CHAR_INIT(40), \
_PyBytes_CHAR_INIT(41), \
_PyBytes_CHAR_INIT(42), \
_PyBytes_CHAR_INIT(43), \
_PyBytes_CHAR_INIT(44), \
_PyBytes_CHAR_INIT(45), \
_PyBytes_CHAR_INIT(46), \
_PyBytes_CHAR_INIT(47), \
_PyBytes_CHAR_INIT(48), \
_PyBytes_CHAR_INIT(49), \
_PyBytes_CHAR_INIT(50), \
_PyBytes_CHAR_INIT(51), \
_PyBytes_CHAR_INIT(52), \
_PyBytes_CHAR_INIT(53), \
_PyBytes_CHAR_INIT(54), \
_PyBytes_CHAR_INIT(55), \
_PyBytes_CHAR_INIT(56), \
_PyBytes_CHAR_INIT(57), \
_PyBytes_CHAR_INIT(58), \
_PyBytes_CHAR_INIT(59), \
_PyBytes_CHAR_INIT(60), \
_PyBytes_CHAR_INIT(61), \
_PyBytes_CHAR_INIT(62), \
_PyBytes_CHAR_INIT(63), \
_PyBytes_CHAR_INIT(64), \
_PyBytes_CHAR_INIT(65), \
_PyBytes_CHAR_INIT(66), \
_PyBytes_CHAR_INIT(67), \
_PyBytes_CHAR_INIT(68), \
_PyBytes_CHAR_INIT(69), \
_PyBytes_CHAR_INIT(70), \
_PyBytes_CHAR_INIT(71), \
_PyBytes_CHAR_INIT(72), \
_PyBytes_CHAR_INIT(73), \
_PyBytes_CHAR_INIT(74), \
_PyBytes_CHAR_INIT(75), \
_PyBytes_CHAR_INIT(76), \
_PyBytes_CHAR_INIT(77), \
_PyBytes_CHAR_INIT(78), \
_PyBytes_CHAR_INIT(79), \
_PyBytes_CHAR_INIT(80), \
_PyBytes_CHAR_INIT(81), \
_PyBytes_CHAR_INIT(82), \
_PyBytes_CHAR_INIT(83), \
_PyBytes_CHAR_INIT(84), \
_PyBytes_CHAR_INIT(85), \
_PyBytes_CHAR_INIT(86), \
_PyBytes_CHAR_INIT(87), \
_PyBytes_CHAR_INIT(88), \
_PyBytes_CHAR_INIT(89), \
_PyBytes_CHAR_INIT(90), \
_PyBytes_CHAR_INIT(91), \
_PyBytes_CHAR_INIT(92), \
_PyBytes_CHAR_INIT(93), \
_PyBytes_CHAR_INIT(94), \
_PyBytes_CHAR_INIT(95), \
_PyBytes_CHAR_INIT(96), \
_PyBytes_CHAR_INIT(97), \
_PyBytes_CHAR_INIT(98), \
_PyBytes_CHAR_INIT(99), \
_PyBytes_CHAR_INIT(100), \
_PyBytes_CHAR_INIT(101), \
_PyBytes_CHAR_INIT(102), \
_PyBytes_CHAR_INIT(103), \
_PyBytes_CHAR_INIT(104), \
_PyBytes_CHAR_INIT(105), \
_PyBytes_CHAR_INIT(106), \
_PyBytes_CHAR_INIT(107), \
_PyBytes_CHAR_INIT(108), \
_PyBytes_CHAR_INIT(109), \
_PyBytes_CHAR_INIT(110), \
_PyBytes_CHAR_INIT(111), \
_PyBytes_CHAR_INIT(112), \
_PyBytes_CHAR_INIT(113), \
_PyBytes_CHAR_INIT(114), \
_PyBytes_CHAR_INIT(115), \
_PyBytes_CHAR_INIT(116), \
_PyBytes_CHAR_INIT(117), \
_PyBytes_CHAR_INIT(118), \
_PyBytes_CHAR_INIT(119), \
_PyBytes_CHAR_INIT(120), \
_PyBytes_CHAR_INIT(121), \
_PyBytes_CHAR_INIT(122), \
_PyBytes_CHAR_INIT(123), \
_PyBytes_CHAR_INIT(124), \
_PyBytes_CHAR_INIT(125), \
_PyBytes_CHAR_INIT(126), \
_PyBytes_CHAR_INIT(127), \
_PyBytes_CHAR_INIT(128), \
_PyBytes_CHAR_INIT(129), \
_PyBytes_CHAR_INIT(130), \
_PyBytes_CHAR_INIT(131), \
_PyBytes_CHAR_INIT(132), \
_PyBytes_CHAR_INIT(133), \
_PyBytes_CHAR_INIT(134), \
_PyBytes_CHAR_INIT(135), \
_PyBytes_CHAR_INIT(136), \
_PyBytes_CHAR_INIT(137), \
_PyBytes_CHAR_INIT(138), \
_PyBytes_CHAR_INIT(139), \
_PyBytes_CHAR_INIT(140), \
_PyBytes_CHAR_INIT(141), \
_PyBytes_CHAR_INIT(142), \
_PyBytes_CHAR_INIT(143), \
_PyBytes_CHAR_INIT(144), \
_PyBytes_CHAR_INIT(145), \
_PyBytes_CHAR_INIT(146), \
_PyBytes_CHAR_INIT(147), \
_PyBytes_CHAR_INIT(148), \
_PyBytes_CHAR_INIT(149), \
_PyBytes_CHAR_INIT(150), \
_PyBytes_CHAR_INIT(151), \
_PyBytes_CHAR_INIT(152), \
_PyBytes_CHAR_INIT(153), \
_PyBytes_CHAR_INIT(154), \
_PyBytes_CHAR_INIT(155), \
_PyBytes_CHAR_INIT(156), \
_PyBytes_CHAR_INIT(157), \
_PyBytes_CHAR_INIT(158), \
_PyBytes_CHAR_INIT(159), \
_PyBytes_CHAR_INIT(160), \
_PyBytes_CHAR_INIT(161), \
_PyBytes_CHAR_INIT(162), \
_PyBytes_CHAR_INIT(163), \
_PyBytes_CHAR_INIT(164), \
_PyBytes_CHAR_INIT(165), \
_PyBytes_CHAR_INIT(166), \
_PyBytes_CHAR_INIT(167), \
_PyBytes_CHAR_INIT(168), \
_PyBytes_CHAR_INIT(169), \
_PyBytes_CHAR_INIT(170), \
_PyBytes_CHAR_INIT(171), \
_PyBytes_CHAR_INIT(172), \
_PyBytes_CHAR_INIT(173), \
_PyBytes_CHAR_INIT(174), \
_PyBytes_CHAR_INIT(175), \
_PyBytes_CHAR_INIT(176), \
_PyBytes_CHAR_INIT(177), \
_PyBytes_CHAR_INIT(178), \
_PyBytes_CHAR_INIT(179), \
_PyBytes_CHAR_INIT(180), \
_PyBytes_CHAR_INIT(181), \
_PyBytes_CHAR_INIT(182), \
_PyBytes_CHAR_INIT(183), \
_PyBytes_CHAR_INIT(184), \
_PyBytes_CHAR_INIT(185), \
_PyBytes_CHAR_INIT(186), \
_PyBytes_CHAR_INIT(187), \
_PyBytes_CHAR_INIT(188), \
_PyBytes_CHAR_INIT(189), \
_PyBytes_CHAR_INIT(190), \
_PyBytes_CHAR_INIT(191), \
_PyBytes_CHAR_INIT(192), \
_PyBytes_CHAR_INIT(193), \
_PyBytes_CHAR_INIT(194), \
_PyBytes_CHAR_INIT(195), \
_PyBytes_CHAR_INIT(196), \
_PyBytes_CHAR_INIT(197), \
_PyBytes_CHAR_INIT(198), \
_PyBytes_CHAR_INIT(199), \
_PyBytes_CHAR_INIT(200), \
_PyBytes_CHAR_INIT(201), \
_PyBytes_CHAR_INIT(202), \
_PyBytes_CHAR_INIT(203), \
_PyBytes_CHAR_INIT(204), \
_PyBytes_CHAR_INIT(205), \
_PyBytes_CHAR_INIT(206), \
_PyBytes_CHAR_INIT(207), \
_PyBytes_CHAR_INIT(208), \
_PyBytes_CHAR_INIT(209), \
_PyBytes_CHAR_INIT(210), \
_PyBytes_CHAR_INIT(211), \
_PyBytes_CHAR_INIT(212), \
_PyBytes_CHAR_INIT(213), \
_PyBytes_CHAR_INIT(214), \
_PyBytes_CHAR_INIT(215), \
_PyBytes_CHAR_INIT(216), \
_PyBytes_CHAR_INIT(217), \
_PyBytes_CHAR_INIT(218), \
_PyBytes_CHAR_INIT(219), \
_PyBytes_CHAR_INIT(220), \
_PyBytes_CHAR_INIT(221), \
_PyBytes_CHAR_INIT(222), \
_PyBytes_CHAR_INIT(223), \
_PyBytes_CHAR_INIT(224), \
_PyBytes_CHAR_INIT(225), \
_PyBytes_CHAR_INIT(226), \
_PyBytes_CHAR_INIT(227), \
_PyBytes_CHAR_INIT(228), \
_PyBytes_CHAR_INIT(229), \
_PyBytes_CHAR_INIT(230), \
_PyBytes_CHAR_INIT(231), \
_PyBytes_CHAR_INIT(232), \
_PyBytes_CHAR_INIT(233), \
_PyBytes_CHAR_INIT(234), \
_PyBytes_CHAR_INIT(235), \
_PyBytes_CHAR_INIT(236), \
_PyBytes_CHAR_INIT(237), \
_PyBytes_CHAR_INIT(238), \
_PyBytes_CHAR_INIT(239), \
_PyBytes_CHAR_INIT(240), \
_PyBytes_CHAR_INIT(241), \
_PyBytes_CHAR_INIT(242), \
_PyBytes_CHAR_INIT(243), \
_PyBytes_CHAR_INIT(244), \
_PyBytes_CHAR_INIT(245), \
_PyBytes_CHAR_INIT(246), \
_PyBytes_CHAR_INIT(247), \
_PyBytes_CHAR_INIT(248), \
_PyBytes_CHAR_INIT(249), \
_PyBytes_CHAR_INIT(250), \
_PyBytes_CHAR_INIT(251), \
_PyBytes_CHAR_INIT(252), \
_PyBytes_CHAR_INIT(253), \
_PyBytes_CHAR_INIT(254), \
_PyBytes_CHAR_INIT(255), \
}, \
}, \
}

View File

@ -10,7 +10,6 @@ extern "C" {
#include "pycore_atomic.h" // _Py_atomic_address
#include "pycore_ast_state.h" // struct ast_state
#include "pycore_bytesobject.h" // struct _Py_bytes_state
#include "pycore_context.h" // struct _Py_context_state
#include "pycore_dict.h" // struct _Py_dict_state
#include "pycore_exceptions.h" // struct _Py_exc_state
@ -152,7 +151,6 @@ struct _is {
PyObject *audit_hooks;
struct _Py_bytes_state bytes;
struct _Py_unicode_state unicode;
struct _Py_float_state float_state;
/* Using a cache is very effective since typically only a single slice is

View File

@ -5,9 +5,9 @@
#include "Python.h"
#include "pycore_abstract.h" // _PyIndex_Check()
#include "pycore_bytes_methods.h" // _Py_bytes_startswith()
#include "pycore_bytesobject.h" // struct _Py_bytes_state
#include "pycore_call.h" // _PyObject_CallNoArgs()
#include "pycore_format.h" // F_LJUST
#include "pycore_global_objects.h" // _Py_GET_GLOBAL_OBJECT()
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_long.h" // _PyLong_DigitValue
#include "pycore_object.h" // _PyObject_GC_TRACK
@ -38,49 +38,24 @@ Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
char *str);
static struct _Py_bytes_state*
get_bytes_state(void)
{
PyInterpreterState *interp = _PyInterpreterState_GET();
return &interp->bytes;
}
#define CHARACTERS _Py_SINGLETON(bytes_characters)
#define CHARACTER(ch) \
((PyBytesObject *)&(CHARACTERS[ch]));
#define EMPTY (&_Py_SINGLETON(bytes_empty))
// Return a borrowed reference to the empty bytes string singleton.
static inline PyObject* bytes_get_empty(void)
{
struct _Py_bytes_state *state = get_bytes_state();
// bytes_get_empty() must not be called before _PyBytes_Init()
// or after _PyBytes_Fini()
assert(state->empty_string != NULL);
return state->empty_string;
return &EMPTY->ob_base.ob_base;
}
// Return a strong reference to the empty bytes string singleton.
static inline PyObject* bytes_new_empty(void)
{
PyObject *empty = bytes_get_empty();
Py_INCREF(empty);
return (PyObject *)empty;
}
static int
bytes_create_empty_string_singleton(struct _Py_bytes_state *state)
{
// Create the empty bytes string singleton
PyBytesObject *op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE);
if (op == NULL) {
return -1;
}
_PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, 0);
op->ob_shash = -1;
op->ob_sval[0] = '\0';
assert(state->empty_string == NULL);
state->empty_string = (PyObject *)op;
return 0;
Py_INCREF(EMPTY);
return (PyObject *)EMPTY;
}
@ -148,12 +123,9 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
return NULL;
}
if (size == 1 && str != NULL) {
struct _Py_bytes_state *state = get_bytes_state();
op = state->characters[*str & UCHAR_MAX];
if (op != NULL) {
Py_INCREF(op);
return (PyObject *)op;
}
op = CHARACTER(*str & 255);
Py_INCREF(op);
return (PyObject *)op;
}
if (size == 0) {
return bytes_new_empty();
@ -166,12 +138,6 @@ PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
return (PyObject *) op;
memcpy(op->ob_sval, str, size);
/* share short strings */
if (size == 1) {
struct _Py_bytes_state *state = get_bytes_state();
Py_INCREF(op);
state->characters[*str & UCHAR_MAX] = op;
}
return (PyObject *) op;
}
@ -189,16 +155,13 @@ PyBytes_FromString(const char *str)
return NULL;
}
struct _Py_bytes_state *state = get_bytes_state();
if (size == 0) {
return bytes_new_empty();
}
else if (size == 1) {
op = state->characters[*str & UCHAR_MAX];
if (op != NULL) {
Py_INCREF(op);
return (PyObject *)op;
}
op = CHARACTER(*str & 255);
Py_INCREF(op);
return (PyObject *)op;
}
/* Inline PyObject_NewVar */
@ -209,12 +172,6 @@ PyBytes_FromString(const char *str)
_PyObject_InitVar((PyVarObject*)op, &PyBytes_Type, size);
op->ob_shash = -1;
memcpy(op->ob_sval, str, size+1);
/* share short strings */
if (size == 1) {
assert(state->characters[*str & UCHAR_MAX] == NULL);
Py_INCREF(op);
state->characters[*str & UCHAR_MAX] = op;
}
return (PyObject *) op;
}
@ -3086,17 +3043,6 @@ error:
}
PyStatus
_PyBytes_InitGlobalObjects(PyInterpreterState *interp)
{
struct _Py_bytes_state *state = &interp->bytes;
if (bytes_create_empty_string_singleton(state) < 0) {
return _PyStatus_NO_MEMORY();
}
return _PyStatus_OK();
}
PyStatus
_PyBytes_InitTypes(PyInterpreterState *interp)
{
@ -3116,16 +3062,6 @@ _PyBytes_InitTypes(PyInterpreterState *interp)
}
void
_PyBytes_Fini(PyInterpreterState *interp)
{
struct _Py_bytes_state* state = &interp->bytes;
for (int i = 0; i < UCHAR_MAX + 1; i++) {
Py_CLEAR(state->characters[i]);
}
Py_CLEAR(state->empty_string);
}
/*********************** Bytes Iterator ****************************/
typedef struct {

View File

@ -678,11 +678,6 @@ pycore_init_global_objects(PyInterpreterState *interp)
_PyFloat_InitState(interp);
status = _PyBytes_InitGlobalObjects(interp);
if (_PyStatus_EXCEPTION(status)) {
return status;
}
status = _PyUnicode_InitGlobalObjects(interp);
if (_PyStatus_EXCEPTION(status)) {
return status;
@ -1685,7 +1680,6 @@ finalize_interp_types(PyInterpreterState *interp)
_PySlice_Fini(interp);
_PyBytes_Fini(interp);
_PyUnicode_Fini(interp);
_PyFloat_Fini(interp);
}