From 7a3dae056d8022a8308b5aa8ac026b0436d66eb5 Mon Sep 17 00:00:00 2001 From: Richard Oudkerk Date: Sun, 5 May 2013 23:05:00 +0100 Subject: [PATCH] Issue #15528: Add weakref.finalize to support finalization using weakref callbacks. This is 2e446e87ac5b except that collections/__init__.py has been modified to import proxy from _weakref instead of weakref. This eliminates an import cycle which seems to cause a problem on Unix but not Windows. --- Doc/library/weakref.rst | 215 +++++++++++++++++++++++++++++++++++- Lib/collections/__init__.py | 2 +- Lib/test/test_weakref.py | 152 ++++++++++++++++++++++++- Lib/weakref.py | 137 ++++++++++++++++++++++- Misc/NEWS | 3 + 5 files changed, 501 insertions(+), 8 deletions(-) diff --git a/Doc/library/weakref.rst b/Doc/library/weakref.rst index ec50107c752..5b5e4601564 100644 --- a/Doc/library/weakref.rst +++ b/Doc/library/weakref.rst @@ -51,10 +51,15 @@ garbage collection. :class:`WeakSet` implements the :class:`set` interface, but keeps weak references to its elements, just like a :class:`WeakKeyDictionary` does. -Most programs should find that using one of these weak container types is all -they need -- it's not usually necessary to create your own weak references -directly. The low-level machinery used by the weak dictionary implementations -is exposed by the :mod:`weakref` module for the benefit of advanced uses. +:class:`finalize` provides a straight forward way to register a +cleanup function to be called when an object is garbage collected. +This is simpler to use than setting up a callback function on a raw +weak reference. + +Most programs should find that using one of these weak container types +or :class:`finalize` is all they need -- it's not usually necessary to +create your own weak references directly. The low-level machinery is +exposed by the :mod:`weakref` module for the benefit of advanced uses. Not all objects can be weakly referenced; those objects which can include class instances, functions written in Python (but not in C), instance methods, sets, @@ -117,7 +122,16 @@ Extension types can easily be made to support weak references; see weakref. If there is no callback or if the referent of the weakref is no longer alive then this attribute will have value ``None``. - .. versionadded:: 3.4 + .. note:: + + Like :meth:`__del__` methods, weak reference callbacks can be + called during interpreter shutdown when module globals have been + overwritten with :const:`None`. This can make writing robust + weak reference callbacks a challenge. Callbacks registered + using :class:`finalize` do not have to worry about this issue + because they will not be run after module teardown has begun. + + .. versionchanged:: 3.4 Added the :attr:`__callback__` attribute. @@ -229,6 +243,66 @@ These method have the same issues as the and :meth:`keyrefs` method of .. versionadded:: 3.4 +.. class:: finalize(obj, func, *args, **kwargs) + + Return a callable finalizer object which will be called when *obj* + is garbage collected. A finalizer is *alive* until it is called + (either explicitly or at garbage collection), and after that it is + *dead*. Calling a live finalizer returns the result of evaluating + ``func(*arg, **kwargs)``, whereas calling a dead finalizer returns + :const:`None`. + + Exceptions raised by finalizer callbacks during garbage collection + will be shown on the standard error output, but cannot be + propagated. They are handled in the same way as exceptions raised + from an object's :meth:`__del__` method or a weak reference's + callback. + + When the program exits, each remaining live finalizer is called + unless its :attr:`atexit` attribute has been set to false. They + are called in reverse order of creation. + + A finalizer will never invoke its callback during the later part of + the interpreter shutdown when module globals are liable to have + been replaced by :const:`None`. + + .. method:: __call__() + + If *self* is alive then mark it as dead and return the result of + calling ``func(*args, **kwargs)``. If *self* is dead then return + :const:`None`. + + .. method:: detach() + + If *self* is alive then mark it as dead and return the tuple + ``(obj, func, args, kwargs)``. If *self* is dead then return + :const:`None`. + + .. method:: peek() + + If *self* is alive then return the tuple ``(obj, func, args, + kwargs)``. If *self* is dead then return :const:`None`. + + .. attribute:: alive + + Property which is true if the finalizer is alive, false otherwise. + + .. attribute:: atexit + + A writable boolean property which by default is true. When the + program exits, it calls all remaining live finalizers for which + :attr:`.atexit` is true. They are called in reverse order of + creation. + + .. note:: + + It is important to ensure that *func*, *args* and *kwargs* do + not own any references to *obj*, either directly or indirectly, + since otherwise *obj* will never be garbage collected. In + particular, *func* should not be a bound method of *obj*. + + .. versionadded:: 3.4 + .. data:: ReferenceType @@ -365,3 +439,134 @@ objects can still be retrieved by ID if they do. def id2obj(oid): return _id2obj_dict[oid] + +.. _finalize-examples: + +Finalizer Objects +----------------- + +Often one uses :class:`finalize` to register a callback without +bothering to keep the returned finalizer object. For instance + + >>> import weakref + >>> class Object: + ... pass + ... + >>> kenny = Object() + >>> weakref.finalize(kenny, print, "You killed Kenny!") #doctest:+ELLIPSIS + + >>> del kenny + You killed Kenny! + +The finalizer can be called directly as well. However the finalizer +will invoke the callback at most once. + + >>> def callback(x, y, z): + ... print("CALLBACK") + ... return x + y + z + ... + >>> obj = Object() + >>> f = weakref.finalize(obj, callback, 1, 2, z=3) + >>> assert f.alive + >>> assert f() == 6 + CALLBACK + >>> assert not f.alive + >>> f() # callback not called because finalizer dead + >>> del obj # callback not called because finalizer dead + +You can unregister a finalizer using its :meth:`~finalize.detach` +method. This kills the finalizer and returns the arguments passed to +the constructor when it was created. + + >>> obj = Object() + >>> f = weakref.finalize(obj, callback, 1, 2, z=3) + >>> f.detach() #doctest:+ELLIPSIS + (<__main__.Object object ...>, , (1, 2), {'z': 3}) + >>> newobj, func, args, kwargs = _ + >>> assert not f.alive + >>> assert newobj is obj + >>> assert func(*args, **kwargs) == 6 + CALLBACK + +Unless you set the :attr:`~finalize.atexit` attribute to +:const:`False`, a finalizer will be called when the program exit if it +is still alive. For instance + + >>> obj = Object() + >>> weakref.finalize(obj, print, "obj dead or exiting") #doctest:+ELLIPSIS + + >>> exit() #doctest:+SKIP + obj dead or exiting + + +Comparing finalizers with :meth:`__del__` methods +------------------------------------------------- + +Suppose we want to create a class whose instances represent temporary +directories. The directories should be deleted with their contents +when the first of the following events occurs: + +* the object is garbage collected, +* the object's :meth:`remove` method is called, or +* the program exits. + +We might try to implement the class using a :meth:`__del__` method as +follows:: + + class TempDir: + def __init__(self): + self.name = tempfile.mkdtemp() + + def remove(self): + if self.name is not None: + shutil.rmtree(self.name) + self.name = None + + @property + def removed(self): + return self.name is None + + def __del__(self): + self.remove() + +This solution has a couple of serious problems: + +* There is no guarantee that the object will be garbage collected + before the program exists, so the directory might be left. This is + because reference cycles containing an object with a :meth:`__del__` + method can never be collected. And even if the :class:`TempDir` + object is not itself part of a reference cycle, it may still be kept + alive by some unkown uncollectable reference cycle. + +* The :meth:`__del__` method may be called at shutdown after the + :mod:`shutil` module has been cleaned up, in which case + :attr:`shutil.rmtree` will have been replaced by :const:`None`. + This will cause the :meth:`__del__` method to fail and the directory + will not be removed. + +Using finalizers we can avoid these problems:: + + class TempDir: + def __init__(self): + self.name = tempfile.mkdtemp() + self._finalizer = weakref.finalize(self, shutil.rmtree, self.name) + + def remove(self): + self._finalizer() + + @property + def removed(self): + return not self._finalizer.alive + +Defined like this, even if a :class:`TempDir` object is part of a +reference cycle, that reference cycle can still be garbage collected. +If the object never gets garbage collected the finalizer will still be +called at exit. + +.. note:: + + If you create a finalizer object in a daemonic thread just as the + the program exits then there is the possibility that the finalizer + does not get called at exit. However, in a daemonic thread + :func:`atexit.register`, ``try: ... finally: ...`` and ``with: ...`` + do not guarantee that cleanup occurs either. diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index e7441d25898..94533206986 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -12,7 +12,7 @@ from operator import itemgetter as _itemgetter, eq as _eq from keyword import iskeyword as _iskeyword import sys as _sys import heapq as _heapq -from weakref import proxy as _proxy +from _weakref import proxy as _proxy from itertools import repeat as _repeat, chain as _chain, starmap as _starmap from reprlib import recursive_repr as _recursive_repr diff --git a/Lib/test/test_weakref.py b/Lib/test/test_weakref.py index e32e2485615..551d95cb91b 100644 --- a/Lib/test/test_weakref.py +++ b/Lib/test/test_weakref.py @@ -7,11 +7,15 @@ import operator import contextlib import copy -from test import support +from test import support, script_helper # Used in ReferencesTestCase.test_ref_created_during_del() . ref_from_del = None +# Used by FinalizeTestCase as a global that may be replaced by None +# when the interpreter shuts down. +_global_var = 'foobar' + class C: def method(self): pass @@ -1551,6 +1555,151 @@ class WeakKeyDictionaryTestCase(mapping_tests.BasicTestMappingProtocol): def _reference(self): return self.__ref.copy() + +class FinalizeTestCase(unittest.TestCase): + + class A: + pass + + def _collect_if_necessary(self): + # we create no ref-cycles so in CPython no gc should be needed + if sys.implementation.name != 'cpython': + support.gc_collect() + + def test_finalize(self): + def add(x,y,z): + res.append(x + y + z) + return x + y + z + + a = self.A() + + res = [] + f = weakref.finalize(a, add, 67, 43, z=89) + self.assertEqual(f.alive, True) + self.assertEqual(f.peek(), (a, add, (67,43), {'z':89})) + self.assertEqual(f(), 199) + self.assertEqual(f(), None) + self.assertEqual(f(), None) + self.assertEqual(f.peek(), None) + self.assertEqual(f.detach(), None) + self.assertEqual(f.alive, False) + self.assertEqual(res, [199]) + + res = [] + f = weakref.finalize(a, add, 67, 43, 89) + self.assertEqual(f.peek(), (a, add, (67,43,89), {})) + self.assertEqual(f.detach(), (a, add, (67,43,89), {})) + self.assertEqual(f(), None) + self.assertEqual(f(), None) + self.assertEqual(f.peek(), None) + self.assertEqual(f.detach(), None) + self.assertEqual(f.alive, False) + self.assertEqual(res, []) + + res = [] + f = weakref.finalize(a, add, x=67, y=43, z=89) + del a + self._collect_if_necessary() + self.assertEqual(f(), None) + self.assertEqual(f(), None) + self.assertEqual(f.peek(), None) + self.assertEqual(f.detach(), None) + self.assertEqual(f.alive, False) + self.assertEqual(res, [199]) + + def test_order(self): + a = self.A() + res = [] + + f1 = weakref.finalize(a, res.append, 'f1') + f2 = weakref.finalize(a, res.append, 'f2') + f3 = weakref.finalize(a, res.append, 'f3') + f4 = weakref.finalize(a, res.append, 'f4') + f5 = weakref.finalize(a, res.append, 'f5') + + # make sure finalizers can keep themselves alive + del f1, f4 + + self.assertTrue(f2.alive) + self.assertTrue(f3.alive) + self.assertTrue(f5.alive) + + self.assertTrue(f5.detach()) + self.assertFalse(f5.alive) + + f5() # nothing because previously unregistered + res.append('A') + f3() # => res.append('f3') + self.assertFalse(f3.alive) + res.append('B') + f3() # nothing because previously called + res.append('C') + del a + self._collect_if_necessary() + # => res.append('f4') + # => res.append('f2') + # => res.append('f1') + self.assertFalse(f2.alive) + res.append('D') + f2() # nothing because previously called by gc + + expected = ['A', 'f3', 'B', 'C', 'f4', 'f2', 'f1', 'D'] + self.assertEqual(res, expected) + + def test_all_freed(self): + # we want a weakrefable subclass of weakref.finalize + class MyFinalizer(weakref.finalize): + pass + + a = self.A() + res = [] + def callback(): + res.append(123) + f = MyFinalizer(a, callback) + + wr_callback = weakref.ref(callback) + wr_f = weakref.ref(f) + del callback, f + + self.assertIsNotNone(wr_callback()) + self.assertIsNotNone(wr_f()) + + del a + self._collect_if_necessary() + + self.assertIsNone(wr_callback()) + self.assertIsNone(wr_f()) + self.assertEqual(res, [123]) + + @classmethod + def run_in_child(cls): + def error(): + # Create an atexit finalizer from inside a finalizer called + # at exit. This should be the next to be run. + g1 = weakref.finalize(cls, print, 'g1') + print('f3 error') + 1/0 + + # cls should stay alive till atexit callbacks run + f1 = weakref.finalize(cls, print, 'f1', _global_var) + f2 = weakref.finalize(cls, print, 'f2', _global_var) + f3 = weakref.finalize(cls, error) + f4 = weakref.finalize(cls, print, 'f4', _global_var) + + assert f1.atexit == True + f2.atexit = False + assert f3.atexit == True + assert f4.atexit == True + + def test_atexit(self): + prog = ('from test.test_weakref import FinalizeTestCase;'+ + 'FinalizeTestCase.run_in_child()') + rc, out, err = script_helper.assert_python_ok('-c', prog) + out = out.decode('ascii').splitlines() + self.assertEqual(out, ['f4 foobar', 'f3 error', 'g1', 'f1 foobar']) + self.assertTrue(b'ZeroDivisionError' in err) + + libreftest = """ Doctest for examples in the library reference: weakref.rst >>> import weakref @@ -1644,6 +1793,7 @@ def test_main(): WeakValueDictionaryTestCase, WeakKeyDictionaryTestCase, SubclassableWeakrefTestCase, + FinalizeTestCase, ) support.run_doctest(sys.modules[__name__]) diff --git a/Lib/weakref.py b/Lib/weakref.py index 8f9c107aa6f..4c0b26e15ab 100644 --- a/Lib/weakref.py +++ b/Lib/weakref.py @@ -21,13 +21,16 @@ from _weakref import ( from _weakrefset import WeakSet, _IterationGuard import collections # Import after _weakref to avoid circular import. +import sys +import itertools +import atexit ProxyTypes = (ProxyType, CallableProxyType) __all__ = ["ref", "proxy", "getweakrefcount", "getweakrefs", "WeakKeyDictionary", "ReferenceType", "ProxyType", "CallableProxyType", "ProxyTypes", "WeakValueDictionary", - "WeakSet", "WeakMethod"] + "WeakSet", "WeakMethod", "finalize"] class WeakMethod(ref): @@ -436,3 +439,135 @@ class WeakKeyDictionary(collections.MutableMapping): d[ref(key, self._remove)] = value if len(kwargs): self.update(kwargs) + + +class finalize: + """Class for finalization of weakrefable objects + + finalize(obj, func, *args, **kwargs) returns a callable finalizer + object which will be called when obj is garbage collected. The + first time the finalizer is called it evaluates func(*arg, **kwargs) + and returns the result. After this the finalizer is dead, and + calling it just returns None. + + When the program exits any remaining finalizers for which the + atexit attribute is true will be run in reverse order of creation. + By default atexit is true. + """ + + # Finalizer objects don't have any state of their own. They are + # just used as keys to lookup _Info objects in the registry. This + # ensures that they cannot be part of a ref-cycle. + + __slots__ = () + _registry = {} + _shutdown = False + _index_iter = itertools.count() + _dirty = False + + class _Info: + __slots__ = ("weakref", "func", "args", "kwargs", "atexit", "index") + + def __init__(self, obj, func, *args, **kwargs): + info = self._Info() + info.weakref = ref(obj, self) + info.func = func + info.args = args + info.kwargs = kwargs or None + info.atexit = True + info.index = next(self._index_iter) + self._registry[self] = info + finalize._dirty = True + + def __call__(self, _=None): + """If alive then mark as dead and return func(*args, **kwargs); + otherwise return None""" + info = self._registry.pop(self, None) + if info and not self._shutdown: + return info.func(*info.args, **(info.kwargs or {})) + + def detach(self): + """If alive then mark as dead and return (obj, func, args, kwargs); + otherwise return None""" + info = self._registry.get(self) + obj = info and info.weakref() + if obj is not None and self._registry.pop(self, None): + return (obj, info.func, info.args, info.kwargs or {}) + + def peek(self): + """If alive then return (obj, func, args, kwargs); + otherwise return None""" + info = self._registry.get(self) + obj = info and info.weakref() + if obj is not None: + return (obj, info.func, info.args, info.kwargs or {}) + + @property + def alive(self): + """Whether finalizer is alive""" + return self in self._registry + + @property + def atexit(self): + """Whether finalizer should be called at exit""" + info = self._registry.get(self) + return bool(info) and info.atexit + + @atexit.setter + def atexit(self, value): + info = self._registry.get(self) + if info: + info.atexit = bool(value) + + def __repr__(self): + info = self._registry.get(self) + obj = info and info.weakref() + if obj is None: + return '<%s object at %#x; dead>' % (type(self).__name__, id(self)) + else: + return '<%s object at %#x; for %r at %#x>' % \ + (type(self).__name__, id(self), type(obj).__name__, id(obj)) + + @classmethod + def _select_for_exit(cls): + # Return live finalizers marked for exit, oldest first + L = [(f,i) for (f,i) in cls._registry.items() if i.atexit] + L.sort(key=lambda item:item[1].index) + return [f for (f,i) in L] + + @classmethod + def _exitfunc(cls): + # At shutdown invoke finalizers for which atexit is true. + # This is called once all other non-daemonic threads have been + # joined. + reenable_gc = False + try: + if cls._registry: + import gc + if gc.isenabled(): + reenable_gc = True + gc.disable() + pending = None + while True: + if pending is None or finalize._dirty: + pending = cls._select_for_exit() + finalize._dirty = False + if not pending: + break + f = pending.pop() + try: + # gc is disabled, so (assuming no daemonic + # threads) the following is the only line in + # this function which might trigger creation + # of a new finalizer + f() + except Exception: + sys.excepthook(*sys.exc_info()) + assert f not in cls._registry + finally: + # prevent any more finalizers from executing during shutdown + finalize._shutdown = True + if reenable_gc: + gc.enable() + +atexit.register(finalize._exitfunc) diff --git a/Misc/NEWS b/Misc/NEWS index 0ff2e17e1c4..f7147a2fa9f 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -74,6 +74,9 @@ Core and Builtins Library ------- +- Issue #15528: Add weakref.finalize to support finalization using + weakref callbacks. + - Issue #14173: Avoid crashing when reading a signal handler during interpreter shutdown.