From 8227883d1f1bbb6560e5f175d7ee49f013c094bd Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Wed, 24 Apr 2024 15:55:02 +0100 Subject: [PATCH] gh-118013: Use weakrefs for the cache key in `inspect._shadowed_dict` (#118202) --- Doc/whatsnew/3.12.rst | 7 +++--- Lib/inspect.py | 25 ++++++++++++++++--- Lib/test/libregrtest/utils.py | 2 +- Lib/test/test_inspect/test_inspect.py | 24 ++++++++++++++++++ ...-04-24-12-20-48.gh-issue-118013.TKn_kZ.rst | 9 +++++++ 5 files changed, 59 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-04-24-12-20-48.gh-issue-118013.TKn_kZ.rst diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index ce3d9ec6a29..8757311a484 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -734,8 +734,7 @@ inspect * The performance of :func:`inspect.getattr_static` has been considerably improved. Most calls to the function should be at least 2x faster than they - were in Python 3.11, and some may be 6x faster or more. (Contributed by Alex - Waygood in :gh:`103193`.) + were in Python 3.11. (Contributed by Alex Waygood in :gh:`103193`.) itertools --------- @@ -1006,8 +1005,8 @@ typing :func:`runtime-checkable protocols ` has changed significantly. Most ``isinstance()`` checks against protocols with only a few members should be at least 2x faster than in 3.11, and some may be 20x - faster or more. However, ``isinstance()`` checks against protocols with fourteen - or more members may be slower than in Python 3.11. (Contributed by Alex + faster or more. However, ``isinstance()`` checks against protocols with many + members may be slower than in Python 3.11. (Contributed by Alex Waygood in :gh:`74690` and :gh:`103193`.) * All :data:`typing.TypedDict` and :data:`typing.NamedTuple` classes now have the diff --git a/Lib/inspect.py b/Lib/inspect.py index 422c09a92ad..3c346b27b1f 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -160,6 +160,7 @@ import builtins from keyword import iskeyword from operator import attrgetter from collections import namedtuple, OrderedDict +from weakref import ref as make_weakref # Create constants for the compiler flags in Include/code.h # We try to get them from dis to avoid duplication @@ -1832,9 +1833,16 @@ def _check_class(klass, attr): return entry.__dict__[attr] return _sentinel + @functools.lru_cache() -def _shadowed_dict_from_mro_tuple(mro): - for entry in mro: +def _shadowed_dict_from_weakref_mro_tuple(*weakref_mro): + for weakref_entry in weakref_mro: + # Normally we'd have to check whether the result of weakref_entry() + # is None here, in case the object the weakref is pointing to has died. + # In this specific case, however, we know that the only caller of this + # function is `_shadowed_dict()`, and that therefore this weakref is + # guaranteed to point to an object that is still alive. + entry = weakref_entry() dunder_dict = _get_dunder_dict_of_class(entry) if '__dict__' in dunder_dict: class_dict = dunder_dict['__dict__'] @@ -1844,8 +1852,19 @@ def _shadowed_dict_from_mro_tuple(mro): return class_dict return _sentinel + def _shadowed_dict(klass): - return _shadowed_dict_from_mro_tuple(_static_getmro(klass)) + # gh-118013: the inner function here is decorated with lru_cache for + # performance reasons, *but* make sure not to pass strong references + # to the items in the mro. Doing so can lead to unexpected memory + # consumption in cases where classes are dynamically created and + # destroyed, and the dynamically created classes happen to be the only + # objects that hold strong references to other objects that take up a + # significant amount of memory. + return _shadowed_dict_from_weakref_mro_tuple( + *[make_weakref(entry) for entry in _static_getmro(klass)] + ) + def getattr_static(obj, attr, default=_sentinel): """Retrieve attributes without triggering dynamic lookup via the diff --git a/Lib/test/libregrtest/utils.py b/Lib/test/libregrtest/utils.py index 791f996127e..8253d330b95 100644 --- a/Lib/test/libregrtest/utils.py +++ b/Lib/test/libregrtest/utils.py @@ -275,7 +275,7 @@ def clear_caches(): except KeyError: pass else: - inspect._shadowed_dict_from_mro_tuple.cache_clear() + inspect._shadowed_dict_from_weakref_mro_tuple.cache_clear() inspect._filesbymodname.clear() inspect.modulesbyfile.clear() diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index b2265e44e0c..169d1edb706 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -4,6 +4,7 @@ import collections import copy import datetime import functools +import gc import importlib import inspect import io @@ -25,6 +26,7 @@ import unicodedata import unittest import unittest.mock import warnings +import weakref try: @@ -2302,6 +2304,13 @@ class TestGetattrStatic(unittest.TestCase): self.assertEqual(inspect.getattr_static(foo, 'a'), 3) self.assertFalse(test.called) + class Bar(Foo): pass + + bar = Bar() + bar.a = 5 + self.assertEqual(inspect.getattr_static(bar, 'a'), 3) + self.assertFalse(test.called) + def test_mutated_mro(self): test = self test.called = False @@ -2406,6 +2415,21 @@ class TestGetattrStatic(unittest.TestCase): self.assertFalse(test.called) + def test_cache_does_not_cause_classes_to_persist(self): + # regression test for gh-118013: + # check that the internal _shadowed_dict cache does not cause + # dynamically created classes to have extended lifetimes even + # when no other strong references to those classes remain. + # Since these classes can themselves hold strong references to + # other objects, this can cause unexpected memory consumption. + class Foo: pass + Foo.instance = Foo() + weakref_to_class = weakref.ref(Foo) + inspect.getattr_static(Foo.instance, 'whatever', 'irrelevant') + del Foo + gc.collect() + self.assertIsNone(weakref_to_class()) + class TestGetGeneratorState(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-04-24-12-20-48.gh-issue-118013.TKn_kZ.rst b/Misc/NEWS.d/next/Library/2024-04-24-12-20-48.gh-issue-118013.TKn_kZ.rst new file mode 100644 index 00000000000..8eb68ebe99b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-04-24-12-20-48.gh-issue-118013.TKn_kZ.rst @@ -0,0 +1,9 @@ +Fix regression introduced in gh-103193 that meant that calling +:func:`inspect.getattr_static` on an instance would cause a strong reference +to that instance's class to persist in an internal cache in the +:mod:`inspect` module. This caused unexpected memory consumption if the +class was dynamically created, the class held strong references to other +objects which took up a significant amount of memory, and the cache +contained the sole strong reference to the class. The fix for the regression +leads to a slowdown in :func:`getattr_static`, but the function should still +be signficantly faster than it was in Python 3.11. Patch by Alex Waygood.