mirror of https://github.com/python/cpython
gh-100227: Move the Dict of Interned Strings to PyInterpreterState (gh-102339)
We can revisit the options for keeping it global later, if desired. For now the approach seems quite complex, so we've gone with the simpler isolation solution in the meantime. https://github.com/python/cpython/issues/100227
This commit is contained in:
parent
7703def37e
commit
ba65a065cf
|
@ -23,13 +23,6 @@ extern "C" {
|
|||
// Only immutable objects should be considered runtime-global.
|
||||
// All others must be per-interpreter.
|
||||
|
||||
#define _Py_CACHED_OBJECT(NAME) \
|
||||
_PyRuntime.cached_objects.NAME
|
||||
|
||||
struct _Py_cached_objects {
|
||||
PyObject *interned_strings;
|
||||
};
|
||||
|
||||
#define _Py_GLOBAL_OBJECT(NAME) \
|
||||
_PyRuntime.static_objects.NAME
|
||||
#define _Py_SINGLETON(NAME) \
|
||||
|
@ -65,6 +58,8 @@ struct _Py_static_objects {
|
|||
(interp)->cached_objects.NAME
|
||||
|
||||
struct _Py_interp_cached_objects {
|
||||
PyObject *interned_strings;
|
||||
|
||||
/* AST */
|
||||
PyObject *str_replace_inf;
|
||||
|
||||
|
|
|
@ -163,7 +163,6 @@ typedef struct pyruntimestate {
|
|||
} types;
|
||||
|
||||
/* All the objects that are shared by the runtime's interpreters. */
|
||||
struct _Py_cached_objects cached_objects;
|
||||
struct _Py_static_objects static_objects;
|
||||
|
||||
/* The following fields are here to avoid allocation during init.
|
||||
|
|
|
@ -59,6 +59,7 @@ struct _Py_unicode_state {
|
|||
struct _Py_unicode_ids ids;
|
||||
};
|
||||
|
||||
extern void _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p);
|
||||
extern void _PyUnicode_ClearInterned(PyInterpreterState *interp);
|
||||
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -231,14 +231,32 @@ static inline PyObject* unicode_new_empty(void)
|
|||
Another way to look at this is that to say that the actual reference
|
||||
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
|
||||
*/
|
||||
static inline PyObject *get_interned_dict(void)
|
||||
static inline PyObject *get_interned_dict(PyInterpreterState *interp)
|
||||
{
|
||||
return _Py_CACHED_OBJECT(interned_strings);
|
||||
return _Py_INTERP_CACHED_OBJECT(interp, interned_strings);
|
||||
}
|
||||
|
||||
static inline void set_interned_dict(PyObject *dict)
|
||||
static int
|
||||
init_interned_dict(PyInterpreterState *interp)
|
||||
{
|
||||
_Py_CACHED_OBJECT(interned_strings) = dict;
|
||||
assert(get_interned_dict(interp) == NULL);
|
||||
PyObject *interned = interned = PyDict_New();
|
||||
if (interned == NULL) {
|
||||
return -1;
|
||||
}
|
||||
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
clear_interned_dict(PyInterpreterState *interp)
|
||||
{
|
||||
PyObject *interned = get_interned_dict(interp);
|
||||
if (interned != NULL) {
|
||||
PyDict_Clear(interned);
|
||||
Py_DECREF(interned);
|
||||
_Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
#define _Py_RETURN_UNICODE_EMPTY() \
|
||||
|
@ -1520,12 +1538,12 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end,
|
|||
static void
|
||||
unicode_dealloc(PyObject *unicode)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
#ifdef Py_DEBUG
|
||||
if (!unicode_is_finalizing() && unicode_is_singleton(unicode)) {
|
||||
_Py_FatalRefcountError("deallocating an Unicode singleton");
|
||||
}
|
||||
#endif
|
||||
PyObject *interned = get_interned_dict();
|
||||
if (PyUnicode_CHECK_INTERNED(unicode)) {
|
||||
/* Revive the dead object temporarily. PyDict_DelItem() removes two
|
||||
references (key and value) which were ignored by
|
||||
|
@ -1534,6 +1552,8 @@ unicode_dealloc(PyObject *unicode)
|
|||
PyDict_DelItem(). */
|
||||
assert(Py_REFCNT(unicode) == 0);
|
||||
Py_SET_REFCNT(unicode, 3);
|
||||
PyObject *interned = get_interned_dict(interp);
|
||||
assert(interned != NULL);
|
||||
if (PyDict_DelItem(interned, unicode) != 0) {
|
||||
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
|
||||
NULL);
|
||||
|
@ -14529,34 +14549,29 @@ _PyUnicode_InitState(PyInterpreterState *interp)
|
|||
PyStatus
|
||||
_PyUnicode_InitGlobalObjects(PyInterpreterState *interp)
|
||||
{
|
||||
if (!_Py_IsMainInterpreter(interp)) {
|
||||
return _PyStatus_OK();
|
||||
}
|
||||
|
||||
// Initialize the global interned dict
|
||||
PyObject *interned = PyDict_New();
|
||||
if (interned == NULL) {
|
||||
if (init_interned_dict(interp)) {
|
||||
PyErr_Clear();
|
||||
return _PyStatus_ERR("failed to create interned dict");
|
||||
}
|
||||
|
||||
set_interned_dict(interned);
|
||||
|
||||
/* Intern statically allocated string identifiers and deepfreeze strings.
|
||||
* This must be done before any module initialization so that statically
|
||||
* allocated string identifiers are used instead of heap allocated strings.
|
||||
* Deepfreeze uses the interned identifiers if present to save space
|
||||
* else generates them and they are interned to speed up dict lookups.
|
||||
*/
|
||||
_PyUnicode_InitStaticStrings();
|
||||
if (_Py_IsMainInterpreter(interp)) {
|
||||
/* Intern statically allocated string identifiers and deepfreeze strings.
|
||||
* This must be done before any module initialization so that statically
|
||||
* allocated string identifiers are used instead of heap allocated strings.
|
||||
* Deepfreeze uses the interned identifiers if present to save space
|
||||
* else generates them and they are interned to speed up dict lookups.
|
||||
*/
|
||||
_PyUnicode_InitStaticStrings(interp);
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1));
|
||||
assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1));
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
assert(_PyUnicode_CheckConsistency(LATIN1(i), 1));
|
||||
}
|
||||
for (int i = 0; i < 256; i++) {
|
||||
assert(_PyUnicode_CheckConsistency(LATIN1(i), 1));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
return _PyStatus_OK();
|
||||
}
|
||||
|
@ -14586,7 +14601,7 @@ error:
|
|||
|
||||
|
||||
void
|
||||
PyUnicode_InternInPlace(PyObject **p)
|
||||
_PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p)
|
||||
{
|
||||
PyObject *s = *p;
|
||||
#ifdef Py_DEBUG
|
||||
|
@ -14608,7 +14623,7 @@ PyUnicode_InternInPlace(PyObject **p)
|
|||
return;
|
||||
}
|
||||
|
||||
PyObject *interned = get_interned_dict();
|
||||
PyObject *interned = get_interned_dict(interp);
|
||||
assert(interned != NULL);
|
||||
|
||||
PyObject *t = PyDict_SetDefault(interned, s, s);
|
||||
|
@ -14629,6 +14644,13 @@ PyUnicode_InternInPlace(PyObject **p)
|
|||
_PyUnicode_STATE(s).interned = 1;
|
||||
}
|
||||
|
||||
void
|
||||
PyUnicode_InternInPlace(PyObject **p)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
_PyUnicode_InternInPlace(interp, p);
|
||||
}
|
||||
|
||||
// Function kept for the stable ABI.
|
||||
PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
|
||||
void
|
||||
|
@ -14653,12 +14675,7 @@ PyUnicode_InternFromString(const char *cp)
|
|||
void
|
||||
_PyUnicode_ClearInterned(PyInterpreterState *interp)
|
||||
{
|
||||
if (!_Py_IsMainInterpreter(interp)) {
|
||||
// interned dict is shared by all interpreters
|
||||
return;
|
||||
}
|
||||
|
||||
PyObject *interned = get_interned_dict();
|
||||
PyObject *interned = get_interned_dict(interp);
|
||||
if (interned == NULL) {
|
||||
return;
|
||||
}
|
||||
|
@ -14693,9 +14710,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
|
|||
total_length);
|
||||
#endif
|
||||
|
||||
PyDict_Clear(interned);
|
||||
Py_DECREF(interned);
|
||||
set_interned_dict(NULL);
|
||||
clear_interned_dict(interp);
|
||||
}
|
||||
|
||||
|
||||
|
@ -15108,7 +15123,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
|
|||
static inline int
|
||||
unicode_is_finalizing(void)
|
||||
{
|
||||
return (get_interned_dict() == NULL);
|
||||
return (get_interned_dict(_PyInterpreterState_Main()) == NULL);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -15131,14 +15146,13 @@ _PyUnicode_Fini(PyInterpreterState *interp)
|
|||
{
|
||||
struct _Py_unicode_state *state = &interp->unicode;
|
||||
|
||||
if (_Py_IsMainInterpreter(interp)) {
|
||||
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
|
||||
assert(get_interned_dict() == NULL);
|
||||
// bpo-47182: force a unicodedata CAPI capsule re-import on
|
||||
// subsequent initialization of main interpreter.
|
||||
}
|
||||
// _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
|
||||
assert(get_interned_dict(interp) == NULL);
|
||||
|
||||
_PyUnicode_FiniEncodings(&state->fs_codec);
|
||||
|
||||
// bpo-47182: force a unicodedata CAPI capsule re-import on
|
||||
// subsequent initialization of interpreter.
|
||||
interp->unicode.ucnhash_capi = NULL;
|
||||
|
||||
unicode_clear_identifiers(state);
|
||||
|
|
|
@ -354,14 +354,14 @@ def generate_static_strings_initializer(identifiers, strings):
|
|||
printer.write(before)
|
||||
printer.write(START)
|
||||
printer.write("static inline void")
|
||||
with printer.block("_PyUnicode_InitStaticStrings(void)"):
|
||||
with printer.block("_PyUnicode_InitStaticStrings(PyInterpreterState *interp)"):
|
||||
printer.write(f'PyObject *string;')
|
||||
for i in sorted(identifiers):
|
||||
# This use of _Py_ID() is ignored by iter_global_strings()
|
||||
# since iter_files() ignores .h files.
|
||||
printer.write(f'string = &_Py_ID({i});')
|
||||
printer.write(f'assert(_PyUnicode_CheckConsistency(string, 1));')
|
||||
printer.write(f'PyUnicode_InternInPlace(&string);')
|
||||
printer.write(f'_PyUnicode_InternInPlace(interp, &string);')
|
||||
# XXX What about "strings"?
|
||||
printer.write(END)
|
||||
printer.write(after)
|
||||
|
|
Loading…
Reference in New Issue