bpo-40521: Disable Unicode caches in isolated subinterpreters (GH-19933)
When Python is built in the experimental isolated subinterpreters mode, disable Unicode singletons and Unicode interned strings since they are shared by all interpreters. Temporary workaround until these caches are made per-interpreter.
This commit is contained in:
parent
299b8c61e9
commit
607b1027fe
|
@ -56,6 +56,11 @@ static size_t method_cache_misses = 0;
|
||||||
static size_t method_cache_collisions = 0;
|
static size_t method_cache_collisions = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* bpo-40521: Interned strings are shared by all subinterpreters */
|
||||||
|
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
|
||||||
|
# define INTERN_NAME_STRINGS
|
||||||
|
#endif
|
||||||
|
|
||||||
/* alphabetical order */
|
/* alphabetical order */
|
||||||
_Py_IDENTIFIER(__abstractmethods__);
|
_Py_IDENTIFIER(__abstractmethods__);
|
||||||
_Py_IDENTIFIER(__class__);
|
_Py_IDENTIFIER(__class__);
|
||||||
|
@ -3418,6 +3423,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
|
||||||
if (name == NULL)
|
if (name == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
#ifdef INTERN_NAME_STRINGS
|
||||||
if (!PyUnicode_CHECK_INTERNED(name)) {
|
if (!PyUnicode_CHECK_INTERNED(name)) {
|
||||||
PyUnicode_InternInPlace(&name);
|
PyUnicode_InternInPlace(&name);
|
||||||
if (!PyUnicode_CHECK_INTERNED(name)) {
|
if (!PyUnicode_CHECK_INTERNED(name)) {
|
||||||
|
@ -3427,6 +3433,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* Will fail in _PyObject_GenericSetAttrWithDict. */
|
/* Will fail in _PyObject_GenericSetAttrWithDict. */
|
||||||
|
@ -7531,10 +7538,17 @@ _PyTypes_InitSlotDefs(void)
|
||||||
for (slotdef *p = slotdefs; p->name; p++) {
|
for (slotdef *p = slotdefs; p->name; p++) {
|
||||||
/* Slots must be ordered by their offset in the PyHeapTypeObject. */
|
/* Slots must be ordered by their offset in the PyHeapTypeObject. */
|
||||||
assert(!p[1].name || p->offset <= p[1].offset);
|
assert(!p[1].name || p->offset <= p[1].offset);
|
||||||
|
#ifdef INTERN_NAME_STRINGS
|
||||||
p->name_strobj = PyUnicode_InternFromString(p->name);
|
p->name_strobj = PyUnicode_InternFromString(p->name);
|
||||||
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
|
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
|
||||||
return _PyStatus_NO_MEMORY();
|
return _PyStatus_NO_MEMORY();
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
p->name_strobj = PyUnicode_FromString(p->name);
|
||||||
|
if (!p->name_strobj) {
|
||||||
|
return _PyStatus_NO_MEMORY();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
slotdefs_initialized = 1;
|
slotdefs_initialized = 1;
|
||||||
return _PyStatus_OK();
|
return _PyStatus_OK();
|
||||||
|
@ -7559,7 +7573,9 @@ update_slot(PyTypeObject *type, PyObject *name)
|
||||||
int offset;
|
int offset;
|
||||||
|
|
||||||
assert(PyUnicode_CheckExact(name));
|
assert(PyUnicode_CheckExact(name));
|
||||||
|
#ifdef INTERN_NAME_STRINGS
|
||||||
assert(PyUnicode_CHECK_INTERNED(name));
|
assert(PyUnicode_CHECK_INTERNED(name));
|
||||||
|
#endif
|
||||||
|
|
||||||
assert(slotdefs_initialized);
|
assert(slotdefs_initialized);
|
||||||
pp = ptrs;
|
pp = ptrs;
|
||||||
|
|
|
@ -198,6 +198,11 @@ extern "C" {
|
||||||
# define OVERALLOCATE_FACTOR 4
|
# define OVERALLOCATE_FACTOR 4
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* bpo-40521: Interned strings are shared by all interpreters. */
|
||||||
|
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
|
||||||
|
# define INTERNED_STRINGS
|
||||||
|
#endif
|
||||||
|
|
||||||
/* This dictionary holds all interned unicode strings. Note that references
|
/* This dictionary holds all interned unicode strings. Note that references
|
||||||
to strings in this dictionary are *not* counted in the string's ob_refcnt.
|
to strings in this dictionary are *not* counted in the string's ob_refcnt.
|
||||||
When the interned string reaches a refcnt of 0 the string deallocation
|
When the interned string reaches a refcnt of 0 the string deallocation
|
||||||
|
@ -206,7 +211,9 @@ extern "C" {
|
||||||
Another way to look at this is that to say that the actual reference
|
Another way to look at this is that to say that the actual reference
|
||||||
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
|
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
|
||||||
*/
|
*/
|
||||||
|
#ifdef INTERNED_STRINGS
|
||||||
static PyObject *interned = NULL;
|
static PyObject *interned = NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
/* The empty Unicode object is shared to improve performance. */
|
/* The empty Unicode object is shared to improve performance. */
|
||||||
static PyObject *unicode_empty = NULL;
|
static PyObject *unicode_empty = NULL;
|
||||||
|
@ -281,9 +288,16 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
|
||||||
/* List of static strings. */
|
/* List of static strings. */
|
||||||
static _Py_Identifier *static_strings = NULL;
|
static _Py_Identifier *static_strings = NULL;
|
||||||
|
|
||||||
|
/* bpo-40521: Latin1 singletons are shared by all interpreters. */
|
||||||
|
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
|
||||||
|
# define LATIN1_SINGLETONS
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef LATIN1_SINGLETONS
|
||||||
/* Single character Unicode strings in the Latin-1 range are being
|
/* Single character Unicode strings in the Latin-1 range are being
|
||||||
shared as well. */
|
shared as well. */
|
||||||
static PyObject *unicode_latin1[256] = {NULL};
|
static PyObject *unicode_latin1[256] = {NULL};
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Fast detection of the most frequent whitespace characters */
|
/* Fast detection of the most frequent whitespace characters */
|
||||||
const unsigned char _Py_ascii_whitespace[] = {
|
const unsigned char _Py_ascii_whitespace[] = {
|
||||||
|
@ -662,6 +676,7 @@ unicode_result_ready(PyObject *unicode)
|
||||||
return unicode_empty;
|
return unicode_empty;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef LATIN1_SINGLETONS
|
||||||
if (length == 1) {
|
if (length == 1) {
|
||||||
const void *data = PyUnicode_DATA(unicode);
|
const void *data = PyUnicode_DATA(unicode);
|
||||||
int kind = PyUnicode_KIND(unicode);
|
int kind = PyUnicode_KIND(unicode);
|
||||||
|
@ -683,6 +698,7 @@ unicode_result_ready(PyObject *unicode)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
assert(_PyUnicode_CheckConsistency(unicode, 1));
|
assert(_PyUnicode_CheckConsistency(unicode, 1));
|
||||||
return unicode;
|
return unicode;
|
||||||
|
@ -1913,10 +1929,12 @@ unicode_dealloc(PyObject *unicode)
|
||||||
case SSTATE_INTERNED_MORTAL:
|
case SSTATE_INTERNED_MORTAL:
|
||||||
/* revive dead object temporarily for DelItem */
|
/* revive dead object temporarily for DelItem */
|
||||||
Py_SET_REFCNT(unicode, 3);
|
Py_SET_REFCNT(unicode, 3);
|
||||||
|
#ifdef INTERNED_STRINGS
|
||||||
if (PyDict_DelItem(interned, unicode) != 0) {
|
if (PyDict_DelItem(interned, unicode) != 0) {
|
||||||
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
|
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
|
||||||
NULL);
|
NULL);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SSTATE_INTERNED_IMMORTAL:
|
case SSTATE_INTERNED_IMMORTAL:
|
||||||
|
@ -1944,15 +1962,18 @@ unicode_dealloc(PyObject *unicode)
|
||||||
static int
|
static int
|
||||||
unicode_is_singleton(PyObject *unicode)
|
unicode_is_singleton(PyObject *unicode)
|
||||||
{
|
{
|
||||||
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
|
if (unicode == unicode_empty) {
|
||||||
if (unicode == unicode_empty)
|
|
||||||
return 1;
|
return 1;
|
||||||
|
}
|
||||||
|
#ifdef LATIN1_SINGLETONS
|
||||||
|
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
|
||||||
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)
|
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)
|
||||||
{
|
{
|
||||||
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
|
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
|
||||||
if (ch < 256 && unicode_latin1[ch] == unicode)
|
if (ch < 256 && unicode_latin1[ch] == unicode)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -2094,16 +2115,28 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
|
||||||
static PyObject*
|
static PyObject*
|
||||||
get_latin1_char(unsigned char ch)
|
get_latin1_char(unsigned char ch)
|
||||||
{
|
{
|
||||||
PyObject *unicode = unicode_latin1[ch];
|
PyObject *unicode;
|
||||||
if (!unicode) {
|
|
||||||
unicode = PyUnicode_New(1, ch);
|
#ifdef LATIN1_SINGLETONS
|
||||||
if (!unicode)
|
unicode = unicode_latin1[ch];
|
||||||
return NULL;
|
if (unicode) {
|
||||||
PyUnicode_1BYTE_DATA(unicode)[0] = ch;
|
Py_INCREF(unicode);
|
||||||
assert(_PyUnicode_CheckConsistency(unicode, 1));
|
return unicode;
|
||||||
unicode_latin1[ch] = unicode;
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
unicode = PyUnicode_New(1, ch);
|
||||||
|
if (!unicode) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyUnicode_1BYTE_DATA(unicode)[0] = ch;
|
||||||
|
assert(_PyUnicode_CheckConsistency(unicode, 1));
|
||||||
|
|
||||||
|
#ifdef LATIN1_SINGLETONS
|
||||||
Py_INCREF(unicode);
|
Py_INCREF(unicode);
|
||||||
|
unicode_latin1[ch] = unicode;
|
||||||
|
#endif
|
||||||
return unicode;
|
return unicode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11270,7 +11303,6 @@ int
|
||||||
_PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
|
_PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
|
||||||
{
|
{
|
||||||
PyObject *right_uni;
|
PyObject *right_uni;
|
||||||
Py_hash_t hash;
|
|
||||||
|
|
||||||
assert(_PyUnicode_CHECK(left));
|
assert(_PyUnicode_CHECK(left));
|
||||||
assert(right->string);
|
assert(right->string);
|
||||||
|
@ -11302,10 +11334,12 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
|
||||||
if (PyUnicode_CHECK_INTERNED(left))
|
if (PyUnicode_CHECK_INTERNED(left))
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
#ifdef INTERNED_STRINGS
|
||||||
assert(_PyUnicode_HASH(right_uni) != -1);
|
assert(_PyUnicode_HASH(right_uni) != -1);
|
||||||
hash = _PyUnicode_HASH(left);
|
Py_hash_t hash = _PyUnicode_HASH(left);
|
||||||
if (hash != -1 && hash != _PyUnicode_HASH(right_uni))
|
if (hash != -1 && hash != _PyUnicode_HASH(right_uni))
|
||||||
return 0;
|
return 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
return unicode_compare_eq(left, right_uni);
|
return unicode_compare_eq(left, right_uni);
|
||||||
}
|
}
|
||||||
|
@ -15487,20 +15521,26 @@ void
|
||||||
PyUnicode_InternInPlace(PyObject **p)
|
PyUnicode_InternInPlace(PyObject **p)
|
||||||
{
|
{
|
||||||
PyObject *s = *p;
|
PyObject *s = *p;
|
||||||
PyObject *t;
|
|
||||||
#ifdef Py_DEBUG
|
#ifdef Py_DEBUG
|
||||||
assert(s != NULL);
|
assert(s != NULL);
|
||||||
assert(_PyUnicode_CHECK(s));
|
assert(_PyUnicode_CHECK(s));
|
||||||
#else
|
#else
|
||||||
if (s == NULL || !PyUnicode_Check(s))
|
if (s == NULL || !PyUnicode_Check(s)) {
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* If it's a subclass, we don't really know what putting
|
/* If it's a subclass, we don't really know what putting
|
||||||
it in the interned dict might do. */
|
it in the interned dict might do. */
|
||||||
if (!PyUnicode_CheckExact(s))
|
if (!PyUnicode_CheckExact(s)) {
|
||||||
return;
|
return;
|
||||||
if (PyUnicode_CHECK_INTERNED(s))
|
}
|
||||||
|
|
||||||
|
if (PyUnicode_CHECK_INTERNED(s)) {
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef INTERNED_STRINGS
|
||||||
if (interned == NULL) {
|
if (interned == NULL) {
|
||||||
interned = PyDict_New();
|
interned = PyDict_New();
|
||||||
if (interned == NULL) {
|
if (interned == NULL) {
|
||||||
|
@ -15508,22 +15548,28 @@ PyUnicode_InternInPlace(PyObject **p)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PyObject *t;
|
||||||
Py_ALLOW_RECURSION
|
Py_ALLOW_RECURSION
|
||||||
t = PyDict_SetDefault(interned, s, s);
|
t = PyDict_SetDefault(interned, s, s);
|
||||||
Py_END_ALLOW_RECURSION
|
Py_END_ALLOW_RECURSION
|
||||||
|
|
||||||
if (t == NULL) {
|
if (t == NULL) {
|
||||||
PyErr_Clear();
|
PyErr_Clear();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (t != s) {
|
if (t != s) {
|
||||||
Py_INCREF(t);
|
Py_INCREF(t);
|
||||||
Py_SETREF(*p, t);
|
Py_SETREF(*p, t);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The two references in interned are not counted by refcnt.
|
/* The two references in interned are not counted by refcnt.
|
||||||
The deallocator will take care of this */
|
The deallocator will take care of this */
|
||||||
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
|
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
|
||||||
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
|
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -16109,9 +16155,11 @@ _PyUnicode_Fini(PyThreadState *tstate)
|
||||||
|
|
||||||
Py_CLEAR(unicode_empty);
|
Py_CLEAR(unicode_empty);
|
||||||
|
|
||||||
|
#ifdef LATIN1_SINGLETONS
|
||||||
for (Py_ssize_t i = 0; i < 256; i++) {
|
for (Py_ssize_t i = 0; i < 256; i++) {
|
||||||
Py_CLEAR(unicode_latin1[i]);
|
Py_CLEAR(unicode_latin1[i]);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
_PyUnicode_ClearStaticStrings();
|
_PyUnicode_ClearStaticStrings();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue