From ead8b7ab3008bda9b6a50d6d9d02ed68dab3b0fd Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Sat, 30 Oct 2004 23:09:22 +0000 Subject: [PATCH] SF 1055820: weakref callback vs gc vs threads In cyclic gc, clear weakrefs to unreachable objects before allowing any Python code (weakref callbacks or __del__ methods) to run. This is a critical bugfix, affecting all versions of Python since weakrefs were introduced. I'll backport to 2.3. --- Include/weakrefobject.h | 20 +++ Lib/test/test_gc.py | 199 +++++++++++++++++++++++++++ Misc/NEWS | 11 ++ Modules/gc_weakref.txt | 114 +++++++++++++++- Modules/gcmodule.c | 293 +++++++++++++++++++++++++--------------- Objects/weakrefobject.c | 4 +- 6 files changed, 531 insertions(+), 110 deletions(-) diff --git a/Include/weakrefobject.h b/Include/weakrefobject.h index 3503892e0e6..daf490f50c2 100644 --- a/Include/weakrefobject.h +++ b/Include/weakrefobject.h @@ -9,11 +9,31 @@ extern "C" { typedef struct _PyWeakReference PyWeakReference; +/* PyWeakReference is the base struct for the Python ReferenceType, ProxyType, + * and CallableProxyType. + */ struct _PyWeakReference { PyObject_HEAD + + /* The object to which this is a weak reference, or Py_None if none. + * Note that this is a stealth reference: wr_object's refcount is + * not incremented to reflect this pointer. + */ PyObject *wr_object; + + /* A callable to invoke when wr_object dies, or NULL if none. */ PyObject *wr_callback; + + /* A cache for wr_object's hash code. As usual for hashes, this is -1 + * if the hash code isn't known yet. + */ long hash; + + /* If wr_object is weakly referenced, wr_object has a doubly-linked NULL- + * terminated list of weak references to it. These are the list pointers. + * If wr_object goes away, wr_object is set to Py_None, and these pointers + * have no meaning then. + */ PyWeakReference *wr_prev; PyWeakReference *wr_next; }; diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 50df81826da..fa8659e1684 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1,6 +1,7 @@ from test.test_support import verify, verbose, TestFailed, vereq import sys import gc +import weakref def expect(actual, expected, name): if actual != expected: @@ -369,6 +370,191 @@ def test_get_referents(): expect(gc.get_referents(1, 'a', 4j), [], "get_referents") +# Bug 1055820 has several tests of longstanding bugs involving weakrefs and +# cyclic gc. + +# An instance of C1055820 has a self-loop, so becomes cyclic trash when +# unreachable. +class C1055820(object): + def __init__(self, i): + self.i = i + self.loop = self + +class GC_Detector(object): + # Create an instance I. Then gc hasn't happened again so long as + # I.gc_happened is false. + + def __init__(self): + self.gc_happened = False + + def it_happened(ignored): + self.gc_happened = True + + # Create a piece of cyclic trash that triggers it_happened when + # gc collects it. + self.wr = weakref.ref(C1055820(666), it_happened) + +def test_bug1055820b(): + # Corresponds to temp2b.py in the bug report. + + ouch = [] + def callback(ignored): + ouch[:] = [wr() for wr in WRs] + + Cs = [C1055820(i) for i in range(2)] + WRs = [weakref.ref(c, callback) for c in Cs] + c = None + + gc.collect() + expect(len(ouch), 0, "bug1055820b") + # Make the two instances trash, and collect again. The bug was that + # the callback materialized a strong reference to an instance, but gc + # cleared the instance's dict anyway. + Cs = None + gc.collect() + expect(len(ouch), 2, "bug1055820b") # else the callbacks didn't run + for x in ouch: + # If the callback resurrected one of these guys, the instance + # would be damaged, with an empty __dict__. + expect(x, None, "bug1055820b") + +def test_bug1055820c(): + # Corresponds to temp2c.py in the bug report. This is pretty elaborate. + + c0 = C1055820(0) + # Move c0 into generation 2. + gc.collect() + + c1 = C1055820(1) + c1.keep_c0_alive = c0 + del c0.loop # now only c1 keeps c0 alive + + c2 = C1055820(2) + c2wr = weakref.ref(c2) # no callback! + + ouch = [] + def callback(ignored): + ouch[:] = [c2wr()] + + # The callback gets associated with a wr on an object in generation 2. + c0wr = weakref.ref(c0, callback) + + c0 = c1 = c2 = None + + # What we've set up: c0, c1, and c2 are all trash now. c0 is in + # generation 2. The only thing keeping it alive is that c1 points to it. + # c1 and c2 are in generation 0, and are in self-loops. There's a global + # weakref to c2 (c2wr), but that weakref has no callback. There's also + # a global weakref to c0 (c0wr), and that does have a callback, and that + # callback references c2 via c2wr(). + # + # c0 has a wr with callback, which references c2wr + # ^ + # | + # | Generation 2 above dots + #. . . . . . . .|. . . . . . . . . . . . . . . . . . . . . . . . + # | Generation 0 below dots + # | + # | + # ^->c1 ^->c2 has a wr but no callback + # | | | | + # <--v <--v + # + # So this is the nightmare: when generation 0 gets collected, we see that + # c2 has a callback-free weakref, and c1 doesn't even have a weakref. + # Collecting generation 0 doesn't see c0 at all, and c0 is the only object + # that has a weakref with a callback. gc clears c1 and c2. Clearing c1 + # has the side effect of dropping the refcount on c0 to 0, so c0 goes + # away (despite that it's in an older generation) and c0's wr callback + # triggers. That in turn materializes a reference to c2 via c2wr(), but + # c2 gets cleared anyway by gc. + + # We want to let gc happen "naturally", to preserve the distinction + # between generations. + junk = [] + i = 0 + detector = GC_Detector() + while not detector.gc_happened: + i += 1 + if i > 10000: + raise TestFailed("gc didn't happen after 10000 iterations") + expect(len(ouch), 0, "bug1055820c") + junk.append([]) # this will eventually trigger gc + + expect(len(ouch), 1, "bug1055820c") # else the callback wasn't invoked + for x in ouch: + # If the callback resurrected c2, the instance would be damaged, + # with an empty __dict__. + expect(x, None, "bug1055820c") + +def test_bug1055820d(): + # Corresponds to temp2d.py in the bug report. This is very much like + # test_bug1055820c, but uses a __del__ method instead of a weakref + # callback to sneak in a resurrection of cyclic trash. + + ouch = [] + class D(C1055820): + def __del__(self): + ouch[:] = [c2wr()] + + d0 = D(0) + # Move all the above into generation 2. + gc.collect() + + c1 = C1055820(1) + c1.keep_d0_alive = d0 + del d0.loop # now only c1 keeps d0 alive + + c2 = C1055820(2) + c2wr = weakref.ref(c2) # no callback! + + d0 = c1 = c2 = None + + # What we've set up: d0, c1, and c2 are all trash now. d0 is in + # generation 2. The only thing keeping it alive is that c1 points to it. + # c1 and c2 are in generation 0, and are in self-loops. There's a global + # weakref to c2 (c2wr), but that weakref has no callback. There are no + # other weakrefs. + # + # d0 has a __del__ method that references c2wr + # ^ + # | + # | Generation 2 above dots + #. . . . . . . .|. . . . . . . . . . . . . . . . . . . . . . . . + # | Generation 0 below dots + # | + # | + # ^->c1 ^->c2 has a wr but no callback + # | | | | + # <--v <--v + # + # So this is the nightmare: when generation 0 gets collected, we see that + # c2 has a callback-free weakref, and c1 doesn't even have a weakref. + # Collecting generation 0 doesn't see d0 at all. gc clears c1 and c2. + # Clearing c1 has the side effect of dropping the refcount on d0 to 0, so + # d0 goes away (despite that it's in an older generation) and d0's __del__ + # triggers. That in turn materializes a reference to c2 via c2wr(), but + # c2 gets cleared anyway by gc. + + # We want to let gc happen "naturally", to preserve the distinction + # between generations. + detector = GC_Detector() + junk = [] + i = 0 + while not detector.gc_happened: + i += 1 + if i > 10000: + raise TestFailed("gc didn't happen after 10000 iterations") + expect(len(ouch), 0, "bug1055820d") + junk.append([]) # this will eventually trigger gc + + expect(len(ouch), 1, "bug1055820d") # else __del__ wasn't invoked + for x in ouch: + # If __del__ resurrected c2, the instance would be damaged, with an + # empty __dict__. + expect(x, None, "bug1055820d") + + def test_all(): gc.collect() # Delete 2nd generation garbage run_test("lists", test_list) @@ -392,6 +578,19 @@ def test_all(): run_test("boom_new", test_boom_new) run_test("boom2_new", test_boom2_new) run_test("get_referents", test_get_referents) + run_test("bug1055820b", test_bug1055820b) + + gc.enable() + try: + run_test("bug1055820c", test_bug1055820c) + finally: + gc.disable() + + gc.enable() + try: + run_test("bug1055820d", test_bug1055820d) + finally: + gc.disable() def test(): if verbose: diff --git a/Misc/NEWS b/Misc/NEWS index 4aba0fc6271..001152a32ee 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -32,6 +32,17 @@ License Version 2. Core and builtins ----------------- +- Bug #1055820 Cyclic garbage collection was not protecting against that + calling a live weakref to a piece of cyclic trash could resurrect an + insane mutation of the trash if any Python code ran during gc (via + running a dead object's __del__ method, running another callback on a + weakref to a dead object, or via any Python code run in any other thread + that managed to obtain the GIL while a __del__ or callback was running + in the thread doing gc). The most likely symptom was "impossible" + ``AttributeEror`` exceptions, appearing seemingly at random, on weakly + referenced objects. The cure was to clear all weakrefs to unreachable + objects before allowing any callbacks to run. + - Bug #1054139 _PyString_Resize() now invalidates its cached hash value. Extension Modules diff --git a/Modules/gc_weakref.txt b/Modules/gc_weakref.txt index b07903bd014..d7be6c3ab42 100644 --- a/Modules/gc_weakref.txt +++ b/Modules/gc_weakref.txt @@ -1,3 +1,79 @@ +Intro +===== + +The basic rule for dealing with weakref callbacks (and __del__ methods too, +for that matter) during cyclic gc: + + Once gc has computed the set of unreachable objects, no Python-level + code can be allowed to access an unreachable object. + +If that can happen, then the Python code can resurrect unreachable objects +too, and gc can't detect that without starting over. Since gc eventually +runs tp_clear on all unreachable objects, if an unreachable object is +resurrected then tp_clear will eventually be called on it (or may already +have been called before resurrection). At best (and this has been an +historically common bug), tp_clear empties an instance's __dict__, and +"impossible" AttributeErrors result. At worst, tp_clear leaves behind an +insane object at the C level, and segfaults result (historically, most +often by setting a new-style class's mro pointer to NULL, after which +attribute lookups performed by the class can segfault). + +OTOH, it's OK to run Python-level code that can't access unreachable +objects, and sometimes that's necessary. The chief example is the callback +attached to a reachable weakref W to an unreachable object O. Since O is +going away, and W is still alive, the callback must be invoked. Because W +is still alive, everything reachable from its callback is also reachable, +so it's also safe to invoke the callback (although that's trickier than it +sounds, since other reachable weakrefs to other unreachable objects may +still exist, and be accessible to the callback -- there are lots of painful +details like this covered in the rest of this file). + +Python 2.4/2.3.5 +================ + +The "Before 2.3.3" section below turned out to be wrong in some ways, but +I'm leaving it as-is because it's more right than wrong, and serves as a +wonderful example of how painful analysis can miss not only the forest for +the trees, but also miss the trees for the aphids sucking the trees +dry . + +The primary thing it missed is that when a weakref to a piece of cyclic +trash (CT) exists, then any call to any Python code whatsoever can end up +materializing a strong reference to that weakref's CT referent, and so +possibly resurrect an insane object (one for which cyclic gc has called-- or +will call before it's done --tp_clear()). It's not even necessarily that a +weakref callback or __del__ method does something nasty on purpose: as +soon as we execute Python code, threads other than the gc thread can run +too, and they can do ordinary things with weakrefs that end up resurrecting +CT while gc is running. + + http://www.python.org/sf/1055820 + +shows how innocent it can be, and also how nasty. Variants of the three +focussed test cases attached to that bug report are now part of Python's +standard Lib/test/test_gc.py. + +Jim Fulton gave the best nutshell summary of the new (in 2.4 and 2.3.5) +approach: + + Clearing cyclic trash can call Python code. If there are weakrefs to + any of the cyclic trash, then those weakrefs can be used to resurrect + the objects. Therefore, *before* clearing cyclic trash, we need to + remove any weakrefs. If any of the weakrefs being removed have + callbacks, then we need to save the callbacks and call them *after* all + of the weakrefs have been cleared. + +Alas, doing just that much doesn't work, because it overlooks what turned +out to be the much subtler problems that were fixed earlier, and described +below. We do clear all weakrefs to CT now before breaking cycles, but not +all callbacks encountered can be run later. That's explained in horrid +detail below. + +Older text follows, with a some later comments in [] brackets: + +Before 2.3.3 +============ + Before 2.3.3, Python's cyclic gc didn't pay any attention to weakrefs. Segfaults in Zope3 resulted. @@ -19,12 +95,19 @@ segfaults) can happen right then, during the callback's execution, or can happen at any later time if the callback manages to resurrect an insane object. +[That missed that, in addition, a weakref to CT can exist outside CT, and + any callback into Python can use such a non-CT weakref to resurrect its CT + referent. The same bad kinds of things can happen then.] + Note that if it's possible for the callback to get at objects in the trash cycles, it must also be the case that the callback itself is part of the trash cycles. Else the callback would have acted as an external root to the current collection, and nothing reachable from it would be in cyclic trash either. +[Except that a non-CT callback can also use a non-CT weakref to get at + CT objects.] + More, if the callback itself is in cyclic trash, then the weakref to which the callback is attached must also be trash, and for the same kind of reason: if the weakref acted as an external root, then the callback could @@ -47,6 +130,13 @@ cyclic trash, it's defensible to first clear weakrefs with callbacks. It's a feature of Python's weakrefs too that when a weakref goes away, the callback (if any) associated with it is thrown away too, unexecuted. +[In 2.4/2.3.5, we first clear all weakrefs to CT objects, whether or not + those weakrefs are themselves CT, and whether or not they have callbacks. + The callbacks (if any) on non-CT weakrefs (if any) are invoked later, + after all weakrefs-to-CT have been cleared. The callbacks (if any) on CT + weakrefs (if any) are never invoked, for the excruciating reasons + explained here.] + Just that much is almost enough to prevent problems, by throwing away *almost* all the weakref callbacks that could get triggered by gc. The problem remaining is that clearing a weakref with a callback decrefs the @@ -56,7 +146,15 @@ weakrefs can trigger callbacks attached to other weakrefs, and those latter weakrefs may or may not be part of cyclic trash. So, to prevent any Python code from running while gc is invoking tp_clear() -on all the objects in cyclic trash, it's not quite enough just to invoke +on all the objects in cyclic trash, + +[That was always wrong: we can't stop Python code from running when gc + is breaking cycles. If an object with a __del__ method is not itself in + a cycle, but is reachable only from CT, then breaking cycles will, as a + matter of course, drop the refcount on that object to 0, and its __del__ + will run right then. What we can and must stop is running any Python + code that could access CT.] + it's not quite enough just to invoke tp_clear() on weakrefs with callbacks first. Instead the weakref module grew a new private function (_PyWeakref_ClearRef) that does only part of tp_clear(): it removes the weakref from the weakly-referenced object's list @@ -65,9 +163,13 @@ _PyWeakref_ClearRef(wr) ensures that wr's callback object will never trigger, and (unlike weakref's tp_clear()) also prevents any callback associated *with* wr's callback object from triggering. +[Although we may trigger such callbacks later, as explained below.] + Then we can call tp_clear on all the cyclic objects and never trigger Python code. +[As above, not so: it means never trigger Python code that can access CT.] + After we do that, the callback objects still need to be decref'ed. Callbacks (if any) *on* the callback objects that were also part of cyclic trash won't get invoked, because we cleared all trash weakrefs with callbacks at the @@ -76,6 +178,10 @@ acted as external roots to everything reachable from them, so nothing reachable from them was part of cyclic trash, so gc didn't do any damage to objects reachable from them, and it's safe to call them at the end of gc. +[That's so. In addition, now we also invoke (if any) the callbacks on + non-CT weakrefs to CT objects, during the same pass that decrefs the + callback objects.] + An alternative would have been to treat objects with callbacks like objects with __del__ methods, refusing to collect them, appending them to gc.garbage instead. That would have been much easier. Jim Fulton gave a strong @@ -105,3 +211,9 @@ __del__ methods is to avoid running finalizers in an arbitrary order). However, a weakref callback on a weakref callback has got to be rare. It's possible to do such a thing, so gc has to be robust against it, but I doubt anyone has done it outside the test case I wrote for it. + +[The callbacks (if any) on non-CT weakrefs to CT objects are also executed + in an arbitrary order now. But they were before too, depending on the + vagaries of when tp_clear() happened to break enough cycles to trigger + them. People simply shouldn't try to use __del__ or weakref callbacks to + do fancy stuff.] diff --git a/Modules/gcmodule.c b/Modules/gcmodule.c index caf749999a7..af60647c019 100644 --- a/Modules/gcmodule.c +++ b/Modules/gcmodule.c @@ -396,38 +396,30 @@ has_finalizer(PyObject *op) return 0; } -/* Move the objects in unreachable with __del__ methods into finalizers, - * and weakrefs with callbacks into wr_callbacks. - * The objects remaining in unreachable do not have __del__ methods, and are - * not weakrefs with callbacks. - * The objects moved have gc_refs changed to GC_REACHABLE; the objects - * remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE. +/* Move the objects in unreachable with __del__ methods into `finalizers`. + * Objects moved into `finalizers` have gc_refs set to GC_REACHABLE; the + * objects remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE. */ static void -move_troublemakers(PyGC_Head *unreachable, - PyGC_Head *finalizers, - PyGC_Head *wr_callbacks) +move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers) { - PyGC_Head *gc = unreachable->gc.gc_next; + PyGC_Head *gc; + PyGC_Head *next; - while (gc != unreachable) { + /* March over unreachable. Move objects with finalizers into + * `finalizers`. + */ + for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) { PyObject *op = FROM_GC(gc); - PyGC_Head *next = gc->gc.gc_next; assert(IS_TENTATIVELY_UNREACHABLE(op)); + next = gc->gc.gc_next; if (has_finalizer(op)) { gc_list_remove(gc); gc_list_append(gc, finalizers); gc->gc.gc_refs = GC_REACHABLE; } - else if (PyWeakref_Check(op) && - ((PyWeakReference *)op)->wr_callback) { - gc_list_remove(gc); - gc_list_append(gc, wr_callbacks); - gc->gc.gc_refs = GC_REACHABLE; - } - gc = next; } } @@ -463,82 +455,144 @@ move_finalizer_reachable(PyGC_Head *finalizers) } } -/* Clear all trash weakrefs with callbacks. This clears weakrefs first, - * which has the happy result of disabling the callbacks without executing - * them. A nasty technical complication: a weakref callback can itself be - * the target of a weakref, in which case decrefing the callback can cause - * another callback to trigger. But we can't allow arbitrary Python code to - * get executed at this point (the callback on the callback may try to muck - * with other cyclic trash we're trying to collect, even resurrecting it - * while we're in the middle of doing tp_clear() on the trash). - * - * The private _PyWeakref_ClearRef() function exists so that we can clear - * the reference in a weakref without triggering a callback on the callback. - * - * We have to save the callback objects and decref them later. But we can't - * allocate new memory to save them (if we can't get new memory, we're dead). - * So we grab a new reference on the clear'ed weakref, which prevents the - * rest of gc from reclaiming it. _PyWeakref_ClearRef() leaves the - * weakref's wr_callback member intact. - * - * In the end, then, wr_callbacks consists of cleared weakrefs that are - * immune from collection. Near the end of gc, after collecting all the - * cyclic trash, we call release_weakrefs(). That releases our references - * to the cleared weakrefs, which in turn may trigger callbacks on their - * callbacks. - */ -static void -clear_weakrefs(PyGC_Head *wr_callbacks) -{ - PyGC_Head *gc = wr_callbacks->gc.gc_next; - - for (; gc != wr_callbacks; gc = gc->gc.gc_next) { - PyObject *op = FROM_GC(gc); - PyWeakReference *wr; - - assert(IS_REACHABLE(op)); - assert(PyWeakref_Check(op)); - wr = (PyWeakReference *)op; - assert(wr->wr_callback != NULL); - Py_INCREF(op); - _PyWeakref_ClearRef(wr); - } -} - -/* Called near the end of gc. This gives up the references we own to - * cleared weakrefs, allowing them to get collected, and in turn decref'ing - * their callbacks. - * - * If a callback object is itself the target of a weakref callback, - * decref'ing the callback object may trigger that other callback. If - * that other callback was part of the cyclic trash in this generation, - * that won't happen, since we cleared *all* trash-weakref callbacks near - * the start of gc. If that other callback was not part of the cyclic trash - * in this generation, then it acted like an external root to this round - * of gc, so all the objects reachable from that callback are still alive. - * - * Giving up the references to the weakref objects will probably make - * them go away too. However, if a weakref is reachable from finalizers, - * it won't go away. We move it to the old generation then. Since a - * weakref object doesn't have a finalizer, that's the right thing to do (it - * doesn't belong in gc.garbage). - * - * We return the number of weakref objects freed (those not appended to old). +/* Clear all weakrefs to unreachable objects, and if such a weakref has a + * callback, invoke it if necessary. Note that it's possible for such + * weakrefs to be outside the unreachable set -- indeed, those are precisely + * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for + * overview & some details. Some weakrefs with callbacks may be reclaimed + * directly by this routine; the number reclaimed is the return value. Other + * weakrefs with callbacks may be moved into the `old` generation. Objects + * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in + * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns, + * no object in `unreachable` is weakly referenced anymore. */ static int -release_weakrefs(PyGC_Head *wr_callbacks, PyGC_Head *old) +handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old) { + PyGC_Head *gc; + PyObject *op; /* generally FROM_GC(gc) */ + PyWeakReference *wr; /* generally a cast of op */ + + PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */ + PyGC_Head wrcb_to_kill; /* weakrefs with callbacks to ignore */ + + PyGC_Head *next; int num_freed = 0; - while (! gc_list_is_empty(wr_callbacks)) { - PyGC_Head *gc = wr_callbacks->gc.gc_next; - PyObject *op = FROM_GC(gc); + gc_list_init(&wrcb_to_call); + gc_list_init(&wrcb_to_kill); + /* Clear all weakrefs to the objects in unreachable. If such a weakref + * also has a callback, move it into `wrcb_to_call` if the callback + * needs to be invoked, or into `wrcb_to_kill` if the callback should + * be ignored. Note that we cannot invoke any callbacks until all + * weakrefs to unreachable objects are cleared, lest the callback + * resurrect an unreachable object via a still-active weakref. That's + * why the weakrefs with callbacks are moved into different lists -- we + * make another pass over those lists after this pass completes. + */ + for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) { + PyWeakReference **wrlist; + + op = FROM_GC(gc); + assert(IS_TENTATIVELY_UNREACHABLE(op)); + next = gc->gc.gc_next; + + if (! PyType_SUPPORTS_WEAKREFS(op->ob_type)) + continue; + + /* It supports weakrefs. Does it have any? */ + wrlist = (PyWeakReference **) + PyObject_GET_WEAKREFS_LISTPTR(op); + + /* `op` may have some weakrefs. March over the list, clear + * all the weakrefs, and move the weakrefs with callbacks + * into one of the wrcb_to_{call,kill} lists. + */ + for (wr = *wrlist; wr != NULL; wr = *wrlist) { + PyGC_Head *wrasgc; /* AS_GC(wr) */ + + /* _PyWeakref_ClearRef clears the weakref but leaves + * the callback pointer intact. Obscure: it also + * changes *wrlist. + */ + assert(wr->wr_object == op); + _PyWeakref_ClearRef(wr); + assert(wr->wr_object == Py_None); + if (wr->wr_callback == NULL) + continue; /* no callback */ + + /* Headache time. `op` is going away, and is weakly referenced by + * `wr`, which has a callback. Should the callback be invoked? If wr + * is also trash, no: + * + * 1. There's no need to call it. The object and the weakref are + * both going away, so it's legitimate to pretend the weakref is + * going away first. The user has to ensure a weakref outlives its + * referent if they want a guarantee that the wr callback will get + * invoked. + * + * 2. It may be catastrophic to call it. If the callback is also in + * cyclic trash (CT), then although the CT is unreachable from + * outside the current generation, CT may be reachable from the + * callback. Then the callback could resurrect insane objects. + * + * Since the callback is never needed and may be unsafe in this case, + * wr is moved to wrcb_to_kill. + * + * OTOH, if wr isn't part of CT, we should invoke the callback: the + * weakref outlived the trash. Note that since wr isn't CT in this + * case, its callback can't be CT either -- wr acted as an external + * root to this generation, and therefore its callback did too. So + * nothing in CT is reachable from the callback either, so it's hard + * to imagine how calling it later could create a problem for us. wr + * is moved to wrcb_to_call in this case. + * + * Obscure: why do we move weakrefs with ignored callbacks to a list + * we crawl over later, instead of getting rid of the callback right + * now? It's because the obvious way doesn't work: setting + * wr->wr_callback to NULL requires that we decref the current + * wr->wr_callback. But callbacks are also weakly referenceable, so + * wr->wr_callback may *itself* be referenced by another weakref with + * another callback. The latter callback could get triggered now if + * we decref'ed now, but as explained before it's potentially unsafe to + * invoke any callback before all weakrefs to CT are cleared. + */ + /* Create a new reference so that wr can't go away + * before we can process it again. + */ + Py_INCREF(wr); + + /* Move wr to the appropriate list. */ + wrasgc = AS_GC(wr); + if (wrasgc == next) + next = wrasgc->gc.gc_next; + gc_list_remove(wrasgc); + gc_list_append(wrasgc, + IS_TENTATIVELY_UNREACHABLE(wr) ? + &wrcb_to_kill : &wrcb_to_call); + wrasgc->gc.gc_refs = GC_REACHABLE; + } + } + + /* Now that all weakrefs to trash have been cleared, it's safe to + * decref the callbacks we decided to ignore. We cannot invoke + * them because they may be able to resurrect unreachable (from + * outside) objects. + */ + while (! gc_list_is_empty(&wrcb_to_kill)) { + gc = wrcb_to_kill.gc.gc_next; + op = FROM_GC(gc); assert(IS_REACHABLE(op)); assert(PyWeakref_Check(op)); assert(((PyWeakReference *)op)->wr_callback != NULL); + + /* Give up the reference we created in the first pass. When + * op's refcount hits 0 (which it may or may not do right now), + * op's tp_dealloc will decref op->wr_callback too. + */ Py_DECREF(op); - if (wr_callbacks->gc.gc_next == gc) { + if (wrcb_to_kill.gc.gc_next == gc) { /* object is still alive -- move it */ gc_list_remove(gc); gc_list_append(gc, old); @@ -546,6 +600,43 @@ release_weakrefs(PyGC_Head *wr_callbacks, PyGC_Head *old) else ++num_freed; } + + /* Finally, invoke the callbacks we decided to honor. It's safe to + * invoke them because they cannot reference objects in `unreachable`. + */ + while (! gc_list_is_empty(&wrcb_to_call)) { + PyObject *temp; + PyObject *callback; + + gc = wrcb_to_call.gc.gc_next; + op = FROM_GC(gc); + assert(IS_REACHABLE(op)); + assert(PyWeakref_Check(op)); + wr = (PyWeakReference *)op; + callback = wr->wr_callback; + assert(callback != NULL); + + /* copy-paste of weakrefobject.c's handle_callback() */ + temp = PyObject_CallFunction(callback, "O", wr); + if (temp == NULL) + PyErr_WriteUnraisable(callback); + else + Py_DECREF(temp); + + /* Give up the reference we created in the first pass. When + * op's refcount hits 0 (which it may or may not do right now), + * op's tp_dealloc will decref op->wr_callback too. + */ + Py_DECREF(op); + if (wrcb_to_call.gc.gc_next == gc) { + /* object is still alive -- move it */ + gc_list_remove(gc); + gc_list_append(gc, old); + } + else + ++num_freed; + } + return num_freed; } @@ -652,7 +743,6 @@ collect(int generation) PyGC_Head *old; /* next older generation */ PyGC_Head unreachable; /* non-problematic unreachable trash */ PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ - PyGC_Head wr_callbacks; /* weakrefs with callbacks */ PyGC_Head *gc; if (delstr == NULL) { @@ -690,9 +780,9 @@ collect(int generation) old = young; /* Using ob_refcnt and gc_refs, calculate which objects in the - * container set are reachable from outside the set (ie. have a + * container set are reachable from outside the set (i.e., have a * refcount greater than 0 when all the references within the - * set are taken into account + * set are taken into account). */ update_refs(young); subtract_refs(young); @@ -714,23 +804,11 @@ collect(int generation) * finalizers can't safely be deleted. Python programmers should take * care not to create such things. For Python, finalizers means * instance objects with __del__ methods. Weakrefs with callbacks - * can call arbitrary Python code, so those are special-cased too. - * - * Move unreachable objects with finalizers, and weakrefs with - * callbacks, into different lists. + * can also call arbitrary Python code but they will be dealt with by + * handle_weakrefs(). */ gc_list_init(&finalizers); - gc_list_init(&wr_callbacks); - move_troublemakers(&unreachable, &finalizers, &wr_callbacks); - /* Clear the trash weakrefs with callbacks. This prevents their - * callbacks from getting invoked (when a weakref goes away, so does - * its callback). - * We do this even if the weakrefs are reachable from finalizers. - * If we didn't, breaking cycles in unreachable later could trigger - * deallocation of objects in finalizers, which could in turn - * cause callbacks to trigger. This may not be ideal behavior. - */ - clear_weakrefs(&wr_callbacks); + move_finalizers(&unreachable, &finalizers); /* finalizers contains the unreachable objects with a finalizer; * unreachable objects reachable *from* those are also uncollectable, * and we move those into the finalizers list too. @@ -747,17 +825,16 @@ collect(int generation) debug_cycle("collectable", FROM_GC(gc)); } } + + /* Clear weakrefs and invoke callbacks as necessary. */ + m += handle_weakrefs(&unreachable, old); + /* Call tp_clear on objects in the unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects * in finalizers to be freed. */ delete_garbage(&unreachable, old); - /* Now that we're done analyzing stuff and breaking cycles, let - * delayed weakref callbacks run. - */ - m += release_weakrefs(&wr_callbacks, old); - /* Collect statistics on uncollectable objects found and print * debugging information. */ for (gc = finalizers.gc.gc_next; diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c index 6da8192da24..02370c4a7ae 100644 --- a/Objects/weakrefobject.c +++ b/Objects/weakrefobject.c @@ -850,7 +850,9 @@ PyWeakref_GetObject(PyObject *ref) return PyWeakref_GET_OBJECT(ref); } - +/* Note that there's an inlined copy-paste of handle_callback() in gcmodule.c's + * handle_weakrefs(). + */ static void handle_callback(PyWeakReference *ref, PyObject *callback) {