From 442ad74fc2928b095760eb89aba93c28eab17f9b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 2 Apr 2021 15:28:13 +0200 Subject: [PATCH] bpo-43687: Py_Initialize() creates singletons earlier (GH-25147) Reorganize pycore_interp_init() to initialize singletons before the the first PyType_Ready() call. Fix an issue when Python is configured using --without-doc-strings. --- Include/internal/pycore_long.h | 4 +- Include/internal/pycore_pylifecycle.h | 7 +- Objects/floatobject.c | 10 +- Objects/longobject.c | 19 ++-- Objects/structseq.c | 3 +- Objects/unicodeobject.c | 52 +++++----- Python/errors.c | 2 +- Python/pylifecycle.c | 136 ++++++++++++++++---------- 8 files changed, 138 insertions(+), 95 deletions(-) diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index a785b23a92b..2bea3a55ec8 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -18,8 +18,8 @@ static inline PyObject* __PyLong_GetSmallInt_internal(int value) assert(-_PY_NSMALLNEGINTS <= value && value < _PY_NSMALLPOSINTS); size_t index = _PY_NSMALLNEGINTS + value; PyObject *obj = (PyObject*)interp->small_ints[index]; - // _PyLong_GetZero() and _PyLong_GetOne() must not be called - // before _PyLong_Init() nor after _PyLong_Fini() + // _PyLong_GetZero(), _PyLong_GetOne() and get_small_int() must not be + // called before _PyLong_Init() nor after _PyLong_Fini(). assert(obj != NULL); return obj; } diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h index 7ae107d73da..524be9d4cbb 100644 --- a/Include/internal/pycore_pylifecycle.h +++ b/Include/internal/pycore_pylifecycle.h @@ -50,9 +50,11 @@ PyAPI_FUNC(int) _Py_IsLocaleCoercionTarget(const char *ctype_loc); /* Various one-time initializers */ extern PyStatus _PyUnicode_Init(PyInterpreterState *interp); +extern PyStatus _PyUnicode_InitTypes(void); extern PyStatus _PyBytes_Init(PyInterpreterState *interp); extern int _PyStructSequence_Init(void); extern int _PyLong_Init(PyInterpreterState *interp); +extern int _PyLong_InitTypes(void); extern PyStatus _PyTuple_Init(PyInterpreterState *interp); extern PyStatus _PyFaulthandler_Init(int enable); extern int _PyTraceMalloc_Init(int enable); @@ -64,9 +66,10 @@ extern PyStatus _PySys_ReadPreinitWarnOptions(PyWideStringList *options); extern PyStatus _PySys_ReadPreinitXOptions(PyConfig *config); extern int _PySys_UpdateConfig(PyThreadState *tstate); extern PyStatus _PyExc_Init(PyInterpreterState *interp); -extern PyStatus _PyErr_Init(void); +extern PyStatus _PyErr_InitTypes(void); extern PyStatus _PyBuiltins_AddExceptions(PyObject * bltinmod); -extern int _PyFloat_Init(void); +extern void _PyFloat_Init(void); +extern int _PyFloat_InitTypes(void); extern PyStatus _Py_HashRandomization_Init(const PyConfig *); extern PyStatus _PyTypes_Init(void); diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 178f7b2f8d2..b3c41b1ca05 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -1968,7 +1968,7 @@ PyTypeObject PyFloat_Type = { .tp_vectorcall = (vectorcallfunc)float_vectorcall, }; -int +void _PyFloat_Init(void) { /* We attempt to determine if this machine is using IEEE @@ -2016,14 +2016,18 @@ _PyFloat_Init(void) double_format = detected_double_format; float_format = detected_float_format; +} +int +_PyFloat_InitTypes(void) +{ /* Init float info */ if (FloatInfoType.tp_name == NULL) { if (PyStructSequence_InitType2(&FloatInfoType, &floatinfo_desc) < 0) { - return 0; + return -1; } } - return 1; + return 0; } void diff --git a/Objects/longobject.c b/Objects/longobject.c index d5037a79b9d..e1c1191e648 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -5719,17 +5719,20 @@ _PyLong_Init(PyInterpreterState *interp) interp->small_ints[i] = v; } + return 0; +} - if (_Py_IsMainInterpreter(interp)) { - /* initialize int_info */ - if (Int_InfoType.tp_name == NULL) { - if (PyStructSequence_InitType2(&Int_InfoType, &int_info_desc) < 0) { - return 0; - } + +int +_PyLong_InitTypes(void) +{ + /* initialize int_info */ + if (Int_InfoType.tp_name == NULL) { + if (PyStructSequence_InitType2(&Int_InfoType, &int_info_desc) < 0) { + return -1; } } - - return 1; + return 0; } void diff --git a/Objects/structseq.c b/Objects/structseq.c index 8a92bdbec08..88e63b658a4 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -579,7 +579,8 @@ int _PyStructSequence_Init(void) if (_PyUnicode_FromId(&PyId_n_sequence_fields) == NULL || _PyUnicode_FromId(&PyId_n_fields) == NULL || _PyUnicode_FromId(&PyId_n_unnamed_fields) == NULL) + { return -1; - + } return 0; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f6bf505b7fc..74c5888d13b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15676,18 +15676,6 @@ PyTypeObject PyUnicode_Type = { PyStatus _PyUnicode_Init(PyInterpreterState *interp) { - /* XXX - move this array to unicodectype.c ? */ - const Py_UCS2 linebreak[] = { - 0x000A, /* LINE FEED */ - 0x000D, /* CARRIAGE RETURN */ - 0x001C, /* FILE SEPARATOR */ - 0x001D, /* GROUP SEPARATOR */ - 0x001E, /* RECORD SEPARATOR */ - 0x0085, /* NEXT LINE */ - 0x2028, /* LINE SEPARATOR */ - 0x2029, /* PARAGRAPH SEPARATOR */ - }; - struct _Py_unicode_state *state = &interp->unicode; if (unicode_create_empty_string_singleton(state) < 0) { return _PyStatus_NO_MEMORY(); @@ -15695,23 +15683,39 @@ _PyUnicode_Init(PyInterpreterState *interp) if (_Py_IsMainInterpreter(interp)) { /* initialize the linebreak bloom filter */ + const Py_UCS2 linebreak[] = { + 0x000A, /* LINE FEED */ + 0x000D, /* CARRIAGE RETURN */ + 0x001C, /* FILE SEPARATOR */ + 0x001D, /* GROUP SEPARATOR */ + 0x001E, /* RECORD SEPARATOR */ + 0x0085, /* NEXT LINE */ + 0x2028, /* LINE SEPARATOR */ + 0x2029, /* PARAGRAPH SEPARATOR */ + }; bloom_linebreak = make_bloom_mask( PyUnicode_2BYTE_KIND, linebreak, Py_ARRAY_LENGTH(linebreak)); + } - if (PyType_Ready(&PyUnicode_Type) < 0) { - return _PyStatus_ERR("Can't initialize unicode type"); - } + return _PyStatus_OK(); +} - if (PyType_Ready(&EncodingMapType) < 0) { - return _PyStatus_ERR("Can't initialize encoding map type"); - } - if (PyType_Ready(&PyFieldNameIter_Type) < 0) { - return _PyStatus_ERR("Can't initialize field name iterator type"); - } - if (PyType_Ready(&PyFormatterIter_Type) < 0) { - return _PyStatus_ERR("Can't initialize formatter iter type"); - } + +PyStatus +_PyUnicode_InitTypes(void) +{ + if (PyType_Ready(&PyUnicode_Type) < 0) { + return _PyStatus_ERR("Can't initialize unicode type"); + } + if (PyType_Ready(&EncodingMapType) < 0) { + return _PyStatus_ERR("Can't initialize encoding map type"); + } + if (PyType_Ready(&PyFieldNameIter_Type) < 0) { + return _PyStatus_ERR("Can't initialize field name iterator type"); + } + if (PyType_Ready(&PyFormatterIter_Type) < 0) { + return _PyStatus_ERR("Can't initialize formatter iter type"); } return _PyStatus_OK(); } diff --git a/Python/errors.c b/Python/errors.c index 9bac7ba70f5..d73ba93b02e 100644 --- a/Python/errors.c +++ b/Python/errors.c @@ -1192,7 +1192,7 @@ static PyStructSequence_Desc UnraisableHookArgs_desc = { PyStatus -_PyErr_Init(void) +_PyErr_InitTypes(void) { if (UnraisableHookArgsType.tp_name == NULL) { if (PyStructSequence_InitType2(&UnraisableHookArgsType, diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 8309477806f..64723ce82d7 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -628,38 +628,16 @@ pycore_create_interpreter(_PyRuntimeState *runtime, static PyStatus -pycore_init_types(PyInterpreterState *interp) +pycore_init_singletons(PyInterpreterState *interp) { PyStatus status; - int is_main_interp = _Py_IsMainInterpreter(interp); - status = _PyGC_Init(interp); - if (_PyStatus_EXCEPTION(status)) { - return status; - } - - // Create the empty tuple singleton. It must be created before the first - // PyType_Ready() call since PyType_Ready() creates tuples, for tp_bases - // for example. - status = _PyTuple_Init(interp); - if (_PyStatus_EXCEPTION(status)) { - return status; - } - - if (is_main_interp) { - status = _PyTypes_Init(); - if (_PyStatus_EXCEPTION(status)) { - return status; - } - } - - if (!_PyLong_Init(interp)) { + if (_PyLong_Init(interp) < 0) { return _PyStatus_ERR("can't init longs"); } - status = _PyUnicode_Init(interp); - if (_PyStatus_EXCEPTION(status)) { - return status; + if (_Py_IsMainInterpreter(interp)) { + _PyFloat_Init(); } status = _PyBytes_Init(interp); @@ -667,22 +645,58 @@ pycore_init_types(PyInterpreterState *interp) return status; } + status = _PyUnicode_Init(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + + status = _PyTuple_Init(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + + return _PyStatus_OK(); +} + + +static PyStatus +pycore_init_types(PyInterpreterState *interp) +{ + PyStatus status; + int is_main_interp = _Py_IsMainInterpreter(interp); + + if (is_main_interp) { + if (_PyStructSequence_Init() < 0) { + return _PyStatus_ERR("can't initialize structseq"); + } + + status = _PyTypes_Init(); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + + if (_PyLong_InitTypes() < 0) { + return _PyStatus_ERR("can't init int type"); + } + + status = _PyUnicode_InitTypes(); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + } + + if (is_main_interp) { + if (_PyFloat_InitTypes() < 0) { + return _PyStatus_ERR("can't init float"); + } + } + status = _PyExc_Init(interp); if (_PyStatus_EXCEPTION(status)) { return status; } - if (is_main_interp) { - if (!_PyFloat_Init()) { - return _PyStatus_ERR("can't init float"); - } - - if (_PyStructSequence_Init() < 0) { - return _PyStatus_ERR("can't initialize structseq"); - } - } - - status = _PyErr_Init(); + status = _PyErr_InitTypes(); if (_PyStatus_EXCEPTION(status)) { return status; } @@ -693,22 +707,15 @@ pycore_init_types(PyInterpreterState *interp) } } - if (_PyWarnings_InitState(interp) < 0) { - return _PyStatus_ERR("can't initialize warnings"); - } - - status = _PyAtExit_Init(interp); - if (_PyStatus_EXCEPTION(status)) { - return status; - } - return _PyStatus_OK(); } static PyStatus -pycore_init_builtins(PyInterpreterState *interp) +pycore_init_builtins(PyThreadState *tstate) { + PyInterpreterState *interp = tstate->interp; + PyObject *bimod = _PyBuiltin_Init(interp); if (bimod == NULL) { goto error; @@ -744,6 +751,7 @@ pycore_init_builtins(PyInterpreterState *interp) } interp->import_func = Py_NewRef(import_func); + assert(!_PyErr_Occurred(tstate)); return _PyStatus_OK(); error: @@ -755,29 +763,49 @@ error: static PyStatus pycore_interp_init(PyThreadState *tstate) { + PyInterpreterState *interp = tstate->interp; PyStatus status; PyObject *sysmod = NULL; - status = pycore_init_types(tstate->interp); + // Create singletons before the first PyType_Ready() call, since + // PyType_Ready() uses singletons like the Unicode empty string (tp_doc) + // and the empty tuple singletons (tp_bases). + status = pycore_init_singletons(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + + // The GC must be initialized before the first GC collection. + status = _PyGC_Init(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + + status = pycore_init_types(interp); if (_PyStatus_EXCEPTION(status)) { goto done; } + if (_PyWarnings_InitState(interp) < 0) { + return _PyStatus_ERR("can't initialize warnings"); + } + + status = _PyAtExit_Init(interp); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + status = _PySys_Create(tstate, &sysmod); if (_PyStatus_EXCEPTION(status)) { goto done; } - assert(!_PyErr_Occurred(tstate)); - - status = pycore_init_builtins(tstate->interp); + status = pycore_init_builtins(tstate); if (_PyStatus_EXCEPTION(status)) { goto done; } - assert(!_PyErr_Occurred(tstate)); - - const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp); + const PyConfig *config = _PyInterpreterState_GetConfig(interp); if (config->_install_importlib) { /* This call sets up builtin and frozen import support */ if (init_importlib(tstate, sysmod) < 0) {