gh-118013: Use weakrefs for the cache key in `inspect._shadowed_dict` (#118202)

This commit is contained in:
Alex Waygood 2024-04-24 15:55:02 +01:00 committed by GitHub
parent 83235f7791
commit 8227883d1f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 59 additions and 8 deletions

View File

@ -734,8 +734,7 @@ inspect
* The performance of :func:`inspect.getattr_static` has been considerably
improved. Most calls to the function should be at least 2x faster than they
were in Python 3.11, and some may be 6x faster or more. (Contributed by Alex
Waygood in :gh:`103193`.)
were in Python 3.11. (Contributed by Alex Waygood in :gh:`103193`.)
itertools
---------
@ -1006,8 +1005,8 @@ typing
:func:`runtime-checkable protocols <typing.runtime_checkable>` has changed
significantly. Most ``isinstance()`` checks against protocols with only a few
members should be at least 2x faster than in 3.11, and some may be 20x
faster or more. However, ``isinstance()`` checks against protocols with fourteen
or more members may be slower than in Python 3.11. (Contributed by Alex
faster or more. However, ``isinstance()`` checks against protocols with many
members may be slower than in Python 3.11. (Contributed by Alex
Waygood in :gh:`74690` and :gh:`103193`.)
* All :data:`typing.TypedDict` and :data:`typing.NamedTuple` classes now have the

View File

@ -160,6 +160,7 @@ import builtins
from keyword import iskeyword
from operator import attrgetter
from collections import namedtuple, OrderedDict
from weakref import ref as make_weakref
# Create constants for the compiler flags in Include/code.h
# We try to get them from dis to avoid duplication
@ -1832,9 +1833,16 @@ def _check_class(klass, attr):
return entry.__dict__[attr]
return _sentinel
@functools.lru_cache()
def _shadowed_dict_from_mro_tuple(mro):
for entry in mro:
def _shadowed_dict_from_weakref_mro_tuple(*weakref_mro):
for weakref_entry in weakref_mro:
# Normally we'd have to check whether the result of weakref_entry()
# is None here, in case the object the weakref is pointing to has died.
# In this specific case, however, we know that the only caller of this
# function is `_shadowed_dict()`, and that therefore this weakref is
# guaranteed to point to an object that is still alive.
entry = weakref_entry()
dunder_dict = _get_dunder_dict_of_class(entry)
if '__dict__' in dunder_dict:
class_dict = dunder_dict['__dict__']
@ -1844,8 +1852,19 @@ def _shadowed_dict_from_mro_tuple(mro):
return class_dict
return _sentinel
def _shadowed_dict(klass):
return _shadowed_dict_from_mro_tuple(_static_getmro(klass))
# gh-118013: the inner function here is decorated with lru_cache for
# performance reasons, *but* make sure not to pass strong references
# to the items in the mro. Doing so can lead to unexpected memory
# consumption in cases where classes are dynamically created and
# destroyed, and the dynamically created classes happen to be the only
# objects that hold strong references to other objects that take up a
# significant amount of memory.
return _shadowed_dict_from_weakref_mro_tuple(
*[make_weakref(entry) for entry in _static_getmro(klass)]
)
def getattr_static(obj, attr, default=_sentinel):
"""Retrieve attributes without triggering dynamic lookup via the

View File

@ -275,7 +275,7 @@ def clear_caches():
except KeyError:
pass
else:
inspect._shadowed_dict_from_mro_tuple.cache_clear()
inspect._shadowed_dict_from_weakref_mro_tuple.cache_clear()
inspect._filesbymodname.clear()
inspect.modulesbyfile.clear()

View File

@ -4,6 +4,7 @@ import collections
import copy
import datetime
import functools
import gc
import importlib
import inspect
import io
@ -25,6 +26,7 @@ import unicodedata
import unittest
import unittest.mock
import warnings
import weakref
try:
@ -2302,6 +2304,13 @@ class TestGetattrStatic(unittest.TestCase):
self.assertEqual(inspect.getattr_static(foo, 'a'), 3)
self.assertFalse(test.called)
class Bar(Foo): pass
bar = Bar()
bar.a = 5
self.assertEqual(inspect.getattr_static(bar, 'a'), 3)
self.assertFalse(test.called)
def test_mutated_mro(self):
test = self
test.called = False
@ -2406,6 +2415,21 @@ class TestGetattrStatic(unittest.TestCase):
self.assertFalse(test.called)
def test_cache_does_not_cause_classes_to_persist(self):
# regression test for gh-118013:
# check that the internal _shadowed_dict cache does not cause
# dynamically created classes to have extended lifetimes even
# when no other strong references to those classes remain.
# Since these classes can themselves hold strong references to
# other objects, this can cause unexpected memory consumption.
class Foo: pass
Foo.instance = Foo()
weakref_to_class = weakref.ref(Foo)
inspect.getattr_static(Foo.instance, 'whatever', 'irrelevant')
del Foo
gc.collect()
self.assertIsNone(weakref_to_class())
class TestGetGeneratorState(unittest.TestCase):

View File

@ -0,0 +1,9 @@
Fix regression introduced in gh-103193 that meant that calling
:func:`inspect.getattr_static` on an instance would cause a strong reference
to that instance's class to persist in an internal cache in the
:mod:`inspect` module. This caused unexpected memory consumption if the
class was dynamically created, the class held strong references to other
objects which took up a significant amount of memory, and the cache
contained the sole strong reference to the class. The fix for the regression
leads to a slowdown in :func:`getattr_static`, but the function should still
be signficantly faster than it was in Python 3.11. Patch by Alex Waygood.