bpo-40521: Per-interpreter interned strings (GH-20085)
Make the Unicode dictionary of interned strings compatible with subinterpreters. Remove the INTERN_NAME_STRINGS macro in typeobject.c: names are always now interned (even if EXPERIMENTAL_ISOLATED_SUBINTERPRETERS macro is defined). _PyUnicode_ClearInterned() now uses PyDict_Next() to no longer allocate memory, to ensure that the interned dictionary is cleared.
This commit is contained in:
parent
993e88cf08
commit
ea251806b8
|
@ -76,6 +76,17 @@ struct _Py_unicode_state {
|
|||
shared as well. */
|
||||
PyObject *latin1[256];
|
||||
struct _Py_unicode_fs_codec fs_codec;
|
||||
|
||||
/* This dictionary holds all interned unicode strings. Note that references
|
||||
to strings in this dictionary are *not* counted in the string's ob_refcnt.
|
||||
When the interned string reaches a refcnt of 0 the string deallocation
|
||||
function will delete the reference from this dictionary.
|
||||
|
||||
Another way to look at this is that to say that the actual reference
|
||||
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
|
||||
*/
|
||||
PyObject *interned;
|
||||
|
||||
// Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId()
|
||||
struct _Py_unicode_ids ids;
|
||||
};
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Make the Unicode dictionary of interned strings compatible with
|
||||
subinterpreters. Patch by Victor Stinner.
|
|
@ -48,11 +48,6 @@ typedef struct PySlot_Offset {
|
|||
} PySlot_Offset;
|
||||
|
||||
|
||||
/* bpo-40521: Interned strings are shared by all subinterpreters */
|
||||
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
|
||||
# define INTERN_NAME_STRINGS
|
||||
#endif
|
||||
|
||||
/* alphabetical order */
|
||||
_Py_IDENTIFIER(__abstractmethods__);
|
||||
_Py_IDENTIFIER(__class__);
|
||||
|
@ -3527,7 +3522,6 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
|
|||
if (name == NULL)
|
||||
return -1;
|
||||
}
|
||||
#ifdef INTERN_NAME_STRINGS
|
||||
if (!PyUnicode_CHECK_INTERNED(name)) {
|
||||
PyUnicode_InternInPlace(&name);
|
||||
if (!PyUnicode_CHECK_INTERNED(name)) {
|
||||
|
@ -3537,7 +3531,6 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
|
|||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
/* Will fail in _PyObject_GenericSetAttrWithDict. */
|
||||
|
@ -7683,17 +7676,10 @@ _PyTypes_InitSlotDefs(void)
|
|||
for (slotdef *p = slotdefs; p->name; p++) {
|
||||
/* Slots must be ordered by their offset in the PyHeapTypeObject. */
|
||||
assert(!p[1].name || p->offset <= p[1].offset);
|
||||
#ifdef INTERN_NAME_STRINGS
|
||||
p->name_strobj = PyUnicode_InternFromString(p->name);
|
||||
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
|
||||
return _PyStatus_NO_MEMORY();
|
||||
}
|
||||
#else
|
||||
p->name_strobj = PyUnicode_FromString(p->name);
|
||||
if (!p->name_strobj) {
|
||||
return _PyStatus_NO_MEMORY();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
slotdefs_initialized = 1;
|
||||
return _PyStatus_OK();
|
||||
|
@ -7718,24 +7704,16 @@ update_slot(PyTypeObject *type, PyObject *name)
|
|||
int offset;
|
||||
|
||||
assert(PyUnicode_CheckExact(name));
|
||||
#ifdef INTERN_NAME_STRINGS
|
||||
assert(PyUnicode_CHECK_INTERNED(name));
|
||||
#endif
|
||||
|
||||
assert(slotdefs_initialized);
|
||||
pp = ptrs;
|
||||
for (p = slotdefs; p->name; p++) {
|
||||
assert(PyUnicode_CheckExact(p->name_strobj));
|
||||
assert(PyUnicode_CheckExact(name));
|
||||
#ifdef INTERN_NAME_STRINGS
|
||||
if (p->name_strobj == name) {
|
||||
*pp++ = p;
|
||||
}
|
||||
#else
|
||||
if (p->name_strobj == name || _PyUnicode_EQ(p->name_strobj, name)) {
|
||||
*pp++ = p;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
*pp = NULL;
|
||||
for (pp = ptrs; *pp; pp++) {
|
||||
|
|
|
@ -206,22 +206,6 @@ extern "C" {
|
|||
# define OVERALLOCATE_FACTOR 4
|
||||
#endif
|
||||
|
||||
/* bpo-40521: Interned strings are shared by all interpreters. */
|
||||
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
|
||||
# define INTERNED_STRINGS
|
||||
#endif
|
||||
|
||||
/* This dictionary holds all interned unicode strings. Note that references
|
||||
to strings in this dictionary are *not* counted in the string's ob_refcnt.
|
||||
When the interned string reaches a refcnt of 0 the string deallocation
|
||||
function will delete the reference from this dictionary.
|
||||
|
||||
Another way to look at this is that to say that the actual reference
|
||||
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
|
||||
*/
|
||||
#ifdef INTERNED_STRINGS
|
||||
static PyObject *interned = NULL;
|
||||
#endif
|
||||
|
||||
static struct _Py_unicode_state*
|
||||
get_unicode_state(void)
|
||||
|
@ -1946,7 +1930,8 @@ unicode_dealloc(PyObject *unicode)
|
|||
break;
|
||||
|
||||
case SSTATE_INTERNED_MORTAL:
|
||||
#ifdef INTERNED_STRINGS
|
||||
{
|
||||
struct _Py_unicode_state *state = get_unicode_state();
|
||||
/* Revive the dead object temporarily. PyDict_DelItem() removes two
|
||||
references (key and value) which were ignored by
|
||||
PyUnicode_InternInPlace(). Use refcnt=3 rather than refcnt=2
|
||||
|
@ -1954,14 +1939,14 @@ unicode_dealloc(PyObject *unicode)
|
|||
PyDict_DelItem(). */
|
||||
assert(Py_REFCNT(unicode) == 0);
|
||||
Py_SET_REFCNT(unicode, 3);
|
||||
if (PyDict_DelItem(interned, unicode) != 0) {
|
||||
if (PyDict_DelItem(state->interned, unicode) != 0) {
|
||||
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
|
||||
NULL);
|
||||
}
|
||||
assert(Py_REFCNT(unicode) == 1);
|
||||
Py_SET_REFCNT(unicode, 0);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
|
||||
case SSTATE_INTERNED_IMMORTAL:
|
||||
_PyObject_ASSERT_FAILED_MSG(unicode, "Immortal interned string died");
|
||||
|
@ -11536,12 +11521,11 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
|
|||
if (PyUnicode_CHECK_INTERNED(left))
|
||||
return 0;
|
||||
|
||||
#ifdef INTERNED_STRINGS
|
||||
assert(_PyUnicode_HASH(right_uni) != -1);
|
||||
Py_hash_t hash = _PyUnicode_HASH(left);
|
||||
if (hash != -1 && hash != _PyUnicode_HASH(right_uni))
|
||||
if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) {
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
return unicode_compare_eq(left, right_uni);
|
||||
}
|
||||
|
@ -15765,23 +15749,21 @@ PyUnicode_InternInPlace(PyObject **p)
|
|||
return;
|
||||
}
|
||||
|
||||
#ifdef INTERNED_STRINGS
|
||||
if (PyUnicode_READY(s) == -1) {
|
||||
PyErr_Clear();
|
||||
return;
|
||||
}
|
||||
|
||||
if (interned == NULL) {
|
||||
interned = PyDict_New();
|
||||
if (interned == NULL) {
|
||||
struct _Py_unicode_state *state = get_unicode_state();
|
||||
if (state->interned == NULL) {
|
||||
state->interned = PyDict_New();
|
||||
if (state->interned == NULL) {
|
||||
PyErr_Clear(); /* Don't leave an exception */
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
PyObject *t;
|
||||
t = PyDict_SetDefault(interned, s, s);
|
||||
|
||||
PyObject *t = PyDict_SetDefault(state->interned, s, s);
|
||||
if (t == NULL) {
|
||||
PyErr_Clear();
|
||||
return;
|
||||
|
@ -15798,13 +15780,9 @@ PyUnicode_InternInPlace(PyObject **p)
|
|||
this. */
|
||||
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
|
||||
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
|
||||
#else
|
||||
// PyDict expects that interned strings have their hash
|
||||
// (PyASCIIObject.hash) already computed.
|
||||
(void)unicode_hash(s);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
PyUnicode_InternImmortal(PyObject **p)
|
||||
{
|
||||
|
@ -15838,35 +15816,25 @@ PyUnicode_InternFromString(const char *cp)
|
|||
void
|
||||
_PyUnicode_ClearInterned(PyThreadState *tstate)
|
||||
{
|
||||
if (!_Py_IsMainInterpreter(tstate)) {
|
||||
// interned dict is shared by all interpreters
|
||||
struct _Py_unicode_state *state = &tstate->interp->unicode;
|
||||
if (state->interned == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (interned == NULL) {
|
||||
return;
|
||||
}
|
||||
assert(PyDict_CheckExact(interned));
|
||||
|
||||
PyObject *keys = PyDict_Keys(interned);
|
||||
if (keys == NULL) {
|
||||
PyErr_Clear();
|
||||
return;
|
||||
}
|
||||
assert(PyList_CheckExact(keys));
|
||||
assert(PyDict_CheckExact(state->interned));
|
||||
|
||||
/* Interned unicode strings are not forcibly deallocated; rather, we give
|
||||
them their stolen references back, and then clear and DECREF the
|
||||
interned dict. */
|
||||
|
||||
Py_ssize_t n = PyList_GET_SIZE(keys);
|
||||
#ifdef INTERNED_STATS
|
||||
fprintf(stderr, "releasing %zd interned strings\n", n);
|
||||
fprintf(stderr, "releasing %zd interned strings\n",
|
||||
PyDict_GET_SIZE(state->interned));
|
||||
|
||||
Py_ssize_t immortal_size = 0, mortal_size = 0;
|
||||
#endif
|
||||
for (Py_ssize_t i = 0; i < n; i++) {
|
||||
PyObject *s = PyList_GET_ITEM(keys, i);
|
||||
Py_ssize_t pos = 0;
|
||||
PyObject *s, *ignored_value;
|
||||
while (PyDict_Next(state->interned, &pos, &s, &ignored_value)) {
|
||||
assert(PyUnicode_IS_READY(s));
|
||||
|
||||
switch (PyUnicode_CHECK_INTERNED(s)) {
|
||||
|
@ -15896,10 +15864,9 @@ _PyUnicode_ClearInterned(PyThreadState *tstate)
|
|||
"total size of all interned strings: %zd/%zd mortal/immortal\n",
|
||||
mortal_size, immortal_size);
|
||||
#endif
|
||||
Py_DECREF(keys);
|
||||
|
||||
PyDict_Clear(interned);
|
||||
Py_CLEAR(interned);
|
||||
PyDict_Clear(state->interned);
|
||||
Py_CLEAR(state->interned);
|
||||
}
|
||||
|
||||
|
||||
|
@ -16269,19 +16236,19 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void)
|
|||
void
|
||||
_PyUnicode_Fini(PyThreadState *tstate)
|
||||
{
|
||||
// _PyUnicode_ClearInterned() must be called before
|
||||
|
||||
struct _Py_unicode_state *state = &tstate->interp->unicode;
|
||||
|
||||
Py_CLEAR(state->empty_string);
|
||||
// _PyUnicode_ClearInterned() must be called before
|
||||
assert(state->interned == NULL);
|
||||
|
||||
_PyUnicode_FiniEncodings(&state->fs_codec);
|
||||
|
||||
unicode_clear_identifiers(tstate);
|
||||
|
||||
for (Py_ssize_t i = 0; i < 256; i++) {
|
||||
Py_CLEAR(state->latin1[i]);
|
||||
}
|
||||
|
||||
unicode_clear_identifiers(tstate);
|
||||
|
||||
_PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec);
|
||||
Py_CLEAR(state->empty_string);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1573,6 +1573,8 @@ finalize_interp_types(PyThreadState *tstate)
|
|||
_PyFrame_Fini(tstate);
|
||||
_PyAsyncGen_Fini(tstate);
|
||||
_PyContext_Fini(tstate);
|
||||
// Call _PyUnicode_ClearInterned() before _PyDict_Fini() since it uses
|
||||
// a dict internally.
|
||||
_PyUnicode_ClearInterned(tstate);
|
||||
|
||||
_PyDict_Fini(tstate);
|
||||
|
|
Loading…
Reference in New Issue