SF 1055820: weakref callback vs gc vs threads
In cyclic gc, clear weakrefs to unreachable objects before allowing any Python code (weakref callbacks or __del__ methods) to run. This is a critical bugfix, affecting all versions of Python since weakrefs were introduced. I'll backport to 2.3.
This commit is contained in:
parent
d7bcf4deb1
commit
ead8b7ab30
|
@ -9,11 +9,31 @@ extern "C" {
|
|||
|
||||
typedef struct _PyWeakReference PyWeakReference;
|
||||
|
||||
/* PyWeakReference is the base struct for the Python ReferenceType, ProxyType,
|
||||
* and CallableProxyType.
|
||||
*/
|
||||
struct _PyWeakReference {
|
||||
PyObject_HEAD
|
||||
|
||||
/* The object to which this is a weak reference, or Py_None if none.
|
||||
* Note that this is a stealth reference: wr_object's refcount is
|
||||
* not incremented to reflect this pointer.
|
||||
*/
|
||||
PyObject *wr_object;
|
||||
|
||||
/* A callable to invoke when wr_object dies, or NULL if none. */
|
||||
PyObject *wr_callback;
|
||||
|
||||
/* A cache for wr_object's hash code. As usual for hashes, this is -1
|
||||
* if the hash code isn't known yet.
|
||||
*/
|
||||
long hash;
|
||||
|
||||
/* If wr_object is weakly referenced, wr_object has a doubly-linked NULL-
|
||||
* terminated list of weak references to it. These are the list pointers.
|
||||
* If wr_object goes away, wr_object is set to Py_None, and these pointers
|
||||
* have no meaning then.
|
||||
*/
|
||||
PyWeakReference *wr_prev;
|
||||
PyWeakReference *wr_next;
|
||||
};
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from test.test_support import verify, verbose, TestFailed, vereq
|
||||
import sys
|
||||
import gc
|
||||
import weakref
|
||||
|
||||
def expect(actual, expected, name):
|
||||
if actual != expected:
|
||||
|
@ -369,6 +370,191 @@ def test_get_referents():
|
|||
|
||||
expect(gc.get_referents(1, 'a', 4j), [], "get_referents")
|
||||
|
||||
# Bug 1055820 has several tests of longstanding bugs involving weakrefs and
|
||||
# cyclic gc.
|
||||
|
||||
# An instance of C1055820 has a self-loop, so becomes cyclic trash when
|
||||
# unreachable.
|
||||
class C1055820(object):
|
||||
def __init__(self, i):
|
||||
self.i = i
|
||||
self.loop = self
|
||||
|
||||
class GC_Detector(object):
|
||||
# Create an instance I. Then gc hasn't happened again so long as
|
||||
# I.gc_happened is false.
|
||||
|
||||
def __init__(self):
|
||||
self.gc_happened = False
|
||||
|
||||
def it_happened(ignored):
|
||||
self.gc_happened = True
|
||||
|
||||
# Create a piece of cyclic trash that triggers it_happened when
|
||||
# gc collects it.
|
||||
self.wr = weakref.ref(C1055820(666), it_happened)
|
||||
|
||||
def test_bug1055820b():
|
||||
# Corresponds to temp2b.py in the bug report.
|
||||
|
||||
ouch = []
|
||||
def callback(ignored):
|
||||
ouch[:] = [wr() for wr in WRs]
|
||||
|
||||
Cs = [C1055820(i) for i in range(2)]
|
||||
WRs = [weakref.ref(c, callback) for c in Cs]
|
||||
c = None
|
||||
|
||||
gc.collect()
|
||||
expect(len(ouch), 0, "bug1055820b")
|
||||
# Make the two instances trash, and collect again. The bug was that
|
||||
# the callback materialized a strong reference to an instance, but gc
|
||||
# cleared the instance's dict anyway.
|
||||
Cs = None
|
||||
gc.collect()
|
||||
expect(len(ouch), 2, "bug1055820b") # else the callbacks didn't run
|
||||
for x in ouch:
|
||||
# If the callback resurrected one of these guys, the instance
|
||||
# would be damaged, with an empty __dict__.
|
||||
expect(x, None, "bug1055820b")
|
||||
|
||||
def test_bug1055820c():
|
||||
# Corresponds to temp2c.py in the bug report. This is pretty elaborate.
|
||||
|
||||
c0 = C1055820(0)
|
||||
# Move c0 into generation 2.
|
||||
gc.collect()
|
||||
|
||||
c1 = C1055820(1)
|
||||
c1.keep_c0_alive = c0
|
||||
del c0.loop # now only c1 keeps c0 alive
|
||||
|
||||
c2 = C1055820(2)
|
||||
c2wr = weakref.ref(c2) # no callback!
|
||||
|
||||
ouch = []
|
||||
def callback(ignored):
|
||||
ouch[:] = [c2wr()]
|
||||
|
||||
# The callback gets associated with a wr on an object in generation 2.
|
||||
c0wr = weakref.ref(c0, callback)
|
||||
|
||||
c0 = c1 = c2 = None
|
||||
|
||||
# What we've set up: c0, c1, and c2 are all trash now. c0 is in
|
||||
# generation 2. The only thing keeping it alive is that c1 points to it.
|
||||
# c1 and c2 are in generation 0, and are in self-loops. There's a global
|
||||
# weakref to c2 (c2wr), but that weakref has no callback. There's also
|
||||
# a global weakref to c0 (c0wr), and that does have a callback, and that
|
||||
# callback references c2 via c2wr().
|
||||
#
|
||||
# c0 has a wr with callback, which references c2wr
|
||||
# ^
|
||||
# |
|
||||
# | Generation 2 above dots
|
||||
#. . . . . . . .|. . . . . . . . . . . . . . . . . . . . . . . .
|
||||
# | Generation 0 below dots
|
||||
# |
|
||||
# |
|
||||
# ^->c1 ^->c2 has a wr but no callback
|
||||
# | | | |
|
||||
# <--v <--v
|
||||
#
|
||||
# So this is the nightmare: when generation 0 gets collected, we see that
|
||||
# c2 has a callback-free weakref, and c1 doesn't even have a weakref.
|
||||
# Collecting generation 0 doesn't see c0 at all, and c0 is the only object
|
||||
# that has a weakref with a callback. gc clears c1 and c2. Clearing c1
|
||||
# has the side effect of dropping the refcount on c0 to 0, so c0 goes
|
||||
# away (despite that it's in an older generation) and c0's wr callback
|
||||
# triggers. That in turn materializes a reference to c2 via c2wr(), but
|
||||
# c2 gets cleared anyway by gc.
|
||||
|
||||
# We want to let gc happen "naturally", to preserve the distinction
|
||||
# between generations.
|
||||
junk = []
|
||||
i = 0
|
||||
detector = GC_Detector()
|
||||
while not detector.gc_happened:
|
||||
i += 1
|
||||
if i > 10000:
|
||||
raise TestFailed("gc didn't happen after 10000 iterations")
|
||||
expect(len(ouch), 0, "bug1055820c")
|
||||
junk.append([]) # this will eventually trigger gc
|
||||
|
||||
expect(len(ouch), 1, "bug1055820c") # else the callback wasn't invoked
|
||||
for x in ouch:
|
||||
# If the callback resurrected c2, the instance would be damaged,
|
||||
# with an empty __dict__.
|
||||
expect(x, None, "bug1055820c")
|
||||
|
||||
def test_bug1055820d():
|
||||
# Corresponds to temp2d.py in the bug report. This is very much like
|
||||
# test_bug1055820c, but uses a __del__ method instead of a weakref
|
||||
# callback to sneak in a resurrection of cyclic trash.
|
||||
|
||||
ouch = []
|
||||
class D(C1055820):
|
||||
def __del__(self):
|
||||
ouch[:] = [c2wr()]
|
||||
|
||||
d0 = D(0)
|
||||
# Move all the above into generation 2.
|
||||
gc.collect()
|
||||
|
||||
c1 = C1055820(1)
|
||||
c1.keep_d0_alive = d0
|
||||
del d0.loop # now only c1 keeps d0 alive
|
||||
|
||||
c2 = C1055820(2)
|
||||
c2wr = weakref.ref(c2) # no callback!
|
||||
|
||||
d0 = c1 = c2 = None
|
||||
|
||||
# What we've set up: d0, c1, and c2 are all trash now. d0 is in
|
||||
# generation 2. The only thing keeping it alive is that c1 points to it.
|
||||
# c1 and c2 are in generation 0, and are in self-loops. There's a global
|
||||
# weakref to c2 (c2wr), but that weakref has no callback. There are no
|
||||
# other weakrefs.
|
||||
#
|
||||
# d0 has a __del__ method that references c2wr
|
||||
# ^
|
||||
# |
|
||||
# | Generation 2 above dots
|
||||
#. . . . . . . .|. . . . . . . . . . . . . . . . . . . . . . . .
|
||||
# | Generation 0 below dots
|
||||
# |
|
||||
# |
|
||||
# ^->c1 ^->c2 has a wr but no callback
|
||||
# | | | |
|
||||
# <--v <--v
|
||||
#
|
||||
# So this is the nightmare: when generation 0 gets collected, we see that
|
||||
# c2 has a callback-free weakref, and c1 doesn't even have a weakref.
|
||||
# Collecting generation 0 doesn't see d0 at all. gc clears c1 and c2.
|
||||
# Clearing c1 has the side effect of dropping the refcount on d0 to 0, so
|
||||
# d0 goes away (despite that it's in an older generation) and d0's __del__
|
||||
# triggers. That in turn materializes a reference to c2 via c2wr(), but
|
||||
# c2 gets cleared anyway by gc.
|
||||
|
||||
# We want to let gc happen "naturally", to preserve the distinction
|
||||
# between generations.
|
||||
detector = GC_Detector()
|
||||
junk = []
|
||||
i = 0
|
||||
while not detector.gc_happened:
|
||||
i += 1
|
||||
if i > 10000:
|
||||
raise TestFailed("gc didn't happen after 10000 iterations")
|
||||
expect(len(ouch), 0, "bug1055820d")
|
||||
junk.append([]) # this will eventually trigger gc
|
||||
|
||||
expect(len(ouch), 1, "bug1055820d") # else __del__ wasn't invoked
|
||||
for x in ouch:
|
||||
# If __del__ resurrected c2, the instance would be damaged, with an
|
||||
# empty __dict__.
|
||||
expect(x, None, "bug1055820d")
|
||||
|
||||
|
||||
def test_all():
|
||||
gc.collect() # Delete 2nd generation garbage
|
||||
run_test("lists", test_list)
|
||||
|
@ -392,6 +578,19 @@ def test_all():
|
|||
run_test("boom_new", test_boom_new)
|
||||
run_test("boom2_new", test_boom2_new)
|
||||
run_test("get_referents", test_get_referents)
|
||||
run_test("bug1055820b", test_bug1055820b)
|
||||
|
||||
gc.enable()
|
||||
try:
|
||||
run_test("bug1055820c", test_bug1055820c)
|
||||
finally:
|
||||
gc.disable()
|
||||
|
||||
gc.enable()
|
||||
try:
|
||||
run_test("bug1055820d", test_bug1055820d)
|
||||
finally:
|
||||
gc.disable()
|
||||
|
||||
def test():
|
||||
if verbose:
|
||||
|
|
11
Misc/NEWS
11
Misc/NEWS
|
@ -32,6 +32,17 @@ License Version 2.
|
|||
Core and builtins
|
||||
-----------------
|
||||
|
||||
- Bug #1055820 Cyclic garbage collection was not protecting against that
|
||||
calling a live weakref to a piece of cyclic trash could resurrect an
|
||||
insane mutation of the trash if any Python code ran during gc (via
|
||||
running a dead object's __del__ method, running another callback on a
|
||||
weakref to a dead object, or via any Python code run in any other thread
|
||||
that managed to obtain the GIL while a __del__ or callback was running
|
||||
in the thread doing gc). The most likely symptom was "impossible"
|
||||
``AttributeEror`` exceptions, appearing seemingly at random, on weakly
|
||||
referenced objects. The cure was to clear all weakrefs to unreachable
|
||||
objects before allowing any callbacks to run.
|
||||
|
||||
- Bug #1054139 _PyString_Resize() now invalidates its cached hash value.
|
||||
|
||||
Extension Modules
|
||||
|
|
|
@ -1,3 +1,79 @@
|
|||
Intro
|
||||
=====
|
||||
|
||||
The basic rule for dealing with weakref callbacks (and __del__ methods too,
|
||||
for that matter) during cyclic gc:
|
||||
|
||||
Once gc has computed the set of unreachable objects, no Python-level
|
||||
code can be allowed to access an unreachable object.
|
||||
|
||||
If that can happen, then the Python code can resurrect unreachable objects
|
||||
too, and gc can't detect that without starting over. Since gc eventually
|
||||
runs tp_clear on all unreachable objects, if an unreachable object is
|
||||
resurrected then tp_clear will eventually be called on it (or may already
|
||||
have been called before resurrection). At best (and this has been an
|
||||
historically common bug), tp_clear empties an instance's __dict__, and
|
||||
"impossible" AttributeErrors result. At worst, tp_clear leaves behind an
|
||||
insane object at the C level, and segfaults result (historically, most
|
||||
often by setting a new-style class's mro pointer to NULL, after which
|
||||
attribute lookups performed by the class can segfault).
|
||||
|
||||
OTOH, it's OK to run Python-level code that can't access unreachable
|
||||
objects, and sometimes that's necessary. The chief example is the callback
|
||||
attached to a reachable weakref W to an unreachable object O. Since O is
|
||||
going away, and W is still alive, the callback must be invoked. Because W
|
||||
is still alive, everything reachable from its callback is also reachable,
|
||||
so it's also safe to invoke the callback (although that's trickier than it
|
||||
sounds, since other reachable weakrefs to other unreachable objects may
|
||||
still exist, and be accessible to the callback -- there are lots of painful
|
||||
details like this covered in the rest of this file).
|
||||
|
||||
Python 2.4/2.3.5
|
||||
================
|
||||
|
||||
The "Before 2.3.3" section below turned out to be wrong in some ways, but
|
||||
I'm leaving it as-is because it's more right than wrong, and serves as a
|
||||
wonderful example of how painful analysis can miss not only the forest for
|
||||
the trees, but also miss the trees for the aphids sucking the trees
|
||||
dry <wink>.
|
||||
|
||||
The primary thing it missed is that when a weakref to a piece of cyclic
|
||||
trash (CT) exists, then any call to any Python code whatsoever can end up
|
||||
materializing a strong reference to that weakref's CT referent, and so
|
||||
possibly resurrect an insane object (one for which cyclic gc has called-- or
|
||||
will call before it's done --tp_clear()). It's not even necessarily that a
|
||||
weakref callback or __del__ method does something nasty on purpose: as
|
||||
soon as we execute Python code, threads other than the gc thread can run
|
||||
too, and they can do ordinary things with weakrefs that end up resurrecting
|
||||
CT while gc is running.
|
||||
|
||||
http://www.python.org/sf/1055820
|
||||
|
||||
shows how innocent it can be, and also how nasty. Variants of the three
|
||||
focussed test cases attached to that bug report are now part of Python's
|
||||
standard Lib/test/test_gc.py.
|
||||
|
||||
Jim Fulton gave the best nutshell summary of the new (in 2.4 and 2.3.5)
|
||||
approach:
|
||||
|
||||
Clearing cyclic trash can call Python code. If there are weakrefs to
|
||||
any of the cyclic trash, then those weakrefs can be used to resurrect
|
||||
the objects. Therefore, *before* clearing cyclic trash, we need to
|
||||
remove any weakrefs. If any of the weakrefs being removed have
|
||||
callbacks, then we need to save the callbacks and call them *after* all
|
||||
of the weakrefs have been cleared.
|
||||
|
||||
Alas, doing just that much doesn't work, because it overlooks what turned
|
||||
out to be the much subtler problems that were fixed earlier, and described
|
||||
below. We do clear all weakrefs to CT now before breaking cycles, but not
|
||||
all callbacks encountered can be run later. That's explained in horrid
|
||||
detail below.
|
||||
|
||||
Older text follows, with a some later comments in [] brackets:
|
||||
|
||||
Before 2.3.3
|
||||
============
|
||||
|
||||
Before 2.3.3, Python's cyclic gc didn't pay any attention to weakrefs.
|
||||
Segfaults in Zope3 resulted.
|
||||
|
||||
|
@ -19,12 +95,19 @@ segfaults) can happen right then, during the callback's execution, or can
|
|||
happen at any later time if the callback manages to resurrect an insane
|
||||
object.
|
||||
|
||||
[That missed that, in addition, a weakref to CT can exist outside CT, and
|
||||
any callback into Python can use such a non-CT weakref to resurrect its CT
|
||||
referent. The same bad kinds of things can happen then.]
|
||||
|
||||
Note that if it's possible for the callback to get at objects in the trash
|
||||
cycles, it must also be the case that the callback itself is part of the
|
||||
trash cycles. Else the callback would have acted as an external root to
|
||||
the current collection, and nothing reachable from it would be in cyclic
|
||||
trash either.
|
||||
|
||||
[Except that a non-CT callback can also use a non-CT weakref to get at
|
||||
CT objects.]
|
||||
|
||||
More, if the callback itself is in cyclic trash, then the weakref to which
|
||||
the callback is attached must also be trash, and for the same kind of
|
||||
reason: if the weakref acted as an external root, then the callback could
|
||||
|
@ -47,6 +130,13 @@ cyclic trash, it's defensible to first clear weakrefs with callbacks. It's
|
|||
a feature of Python's weakrefs too that when a weakref goes away, the
|
||||
callback (if any) associated with it is thrown away too, unexecuted.
|
||||
|
||||
[In 2.4/2.3.5, we first clear all weakrefs to CT objects, whether or not
|
||||
those weakrefs are themselves CT, and whether or not they have callbacks.
|
||||
The callbacks (if any) on non-CT weakrefs (if any) are invoked later,
|
||||
after all weakrefs-to-CT have been cleared. The callbacks (if any) on CT
|
||||
weakrefs (if any) are never invoked, for the excruciating reasons
|
||||
explained here.]
|
||||
|
||||
Just that much is almost enough to prevent problems, by throwing away
|
||||
*almost* all the weakref callbacks that could get triggered by gc. The
|
||||
problem remaining is that clearing a weakref with a callback decrefs the
|
||||
|
@ -56,7 +146,15 @@ weakrefs can trigger callbacks attached to other weakrefs, and those
|
|||
latter weakrefs may or may not be part of cyclic trash.
|
||||
|
||||
So, to prevent any Python code from running while gc is invoking tp_clear()
|
||||
on all the objects in cyclic trash, it's not quite enough just to invoke
|
||||
on all the objects in cyclic trash,
|
||||
|
||||
[That was always wrong: we can't stop Python code from running when gc
|
||||
is breaking cycles. If an object with a __del__ method is not itself in
|
||||
a cycle, but is reachable only from CT, then breaking cycles will, as a
|
||||
matter of course, drop the refcount on that object to 0, and its __del__
|
||||
will run right then. What we can and must stop is running any Python
|
||||
code that could access CT.]
|
||||
it's not quite enough just to invoke
|
||||
tp_clear() on weakrefs with callbacks first. Instead the weakref module
|
||||
grew a new private function (_PyWeakref_ClearRef) that does only part of
|
||||
tp_clear(): it removes the weakref from the weakly-referenced object's list
|
||||
|
@ -65,9 +163,13 @@ _PyWeakref_ClearRef(wr) ensures that wr's callback object will never
|
|||
trigger, and (unlike weakref's tp_clear()) also prevents any callback
|
||||
associated *with* wr's callback object from triggering.
|
||||
|
||||
[Although we may trigger such callbacks later, as explained below.]
|
||||
|
||||
Then we can call tp_clear on all the cyclic objects and never trigger
|
||||
Python code.
|
||||
|
||||
[As above, not so: it means never trigger Python code that can access CT.]
|
||||
|
||||
After we do that, the callback objects still need to be decref'ed. Callbacks
|
||||
(if any) *on* the callback objects that were also part of cyclic trash won't
|
||||
get invoked, because we cleared all trash weakrefs with callbacks at the
|
||||
|
@ -76,6 +178,10 @@ acted as external roots to everything reachable from them, so nothing
|
|||
reachable from them was part of cyclic trash, so gc didn't do any damage to
|
||||
objects reachable from them, and it's safe to call them at the end of gc.
|
||||
|
||||
[That's so. In addition, now we also invoke (if any) the callbacks on
|
||||
non-CT weakrefs to CT objects, during the same pass that decrefs the
|
||||
callback objects.]
|
||||
|
||||
An alternative would have been to treat objects with callbacks like objects
|
||||
with __del__ methods, refusing to collect them, appending them to gc.garbage
|
||||
instead. That would have been much easier. Jim Fulton gave a strong
|
||||
|
@ -105,3 +211,9 @@ __del__ methods is to avoid running finalizers in an arbitrary order).
|
|||
However, a weakref callback on a weakref callback has got to be rare.
|
||||
It's possible to do such a thing, so gc has to be robust against it, but
|
||||
I doubt anyone has done it outside the test case I wrote for it.
|
||||
|
||||
[The callbacks (if any) on non-CT weakrefs to CT objects are also executed
|
||||
in an arbitrary order now. But they were before too, depending on the
|
||||
vagaries of when tp_clear() happened to break enough cycles to trigger
|
||||
them. People simply shouldn't try to use __del__ or weakref callbacks to
|
||||
do fancy stuff.]
|
||||
|
|
|
@ -396,38 +396,30 @@ has_finalizer(PyObject *op)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Move the objects in unreachable with __del__ methods into finalizers,
|
||||
* and weakrefs with callbacks into wr_callbacks.
|
||||
* The objects remaining in unreachable do not have __del__ methods, and are
|
||||
* not weakrefs with callbacks.
|
||||
* The objects moved have gc_refs changed to GC_REACHABLE; the objects
|
||||
* remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE.
|
||||
/* Move the objects in unreachable with __del__ methods into `finalizers`.
|
||||
* Objects moved into `finalizers` have gc_refs set to GC_REACHABLE; the
|
||||
* objects remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE.
|
||||
*/
|
||||
static void
|
||||
move_troublemakers(PyGC_Head *unreachable,
|
||||
PyGC_Head *finalizers,
|
||||
PyGC_Head *wr_callbacks)
|
||||
move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
|
||||
{
|
||||
PyGC_Head *gc = unreachable->gc.gc_next;
|
||||
PyGC_Head *gc;
|
||||
PyGC_Head *next;
|
||||
|
||||
while (gc != unreachable) {
|
||||
/* March over unreachable. Move objects with finalizers into
|
||||
* `finalizers`.
|
||||
*/
|
||||
for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) {
|
||||
PyObject *op = FROM_GC(gc);
|
||||
PyGC_Head *next = gc->gc.gc_next;
|
||||
|
||||
assert(IS_TENTATIVELY_UNREACHABLE(op));
|
||||
next = gc->gc.gc_next;
|
||||
|
||||
if (has_finalizer(op)) {
|
||||
gc_list_remove(gc);
|
||||
gc_list_append(gc, finalizers);
|
||||
gc->gc.gc_refs = GC_REACHABLE;
|
||||
}
|
||||
else if (PyWeakref_Check(op) &&
|
||||
((PyWeakReference *)op)->wr_callback) {
|
||||
gc_list_remove(gc);
|
||||
gc_list_append(gc, wr_callbacks);
|
||||
gc->gc.gc_refs = GC_REACHABLE;
|
||||
}
|
||||
gc = next;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -463,82 +455,144 @@ move_finalizer_reachable(PyGC_Head *finalizers)
|
|||
}
|
||||
}
|
||||
|
||||
/* Clear all trash weakrefs with callbacks. This clears weakrefs first,
|
||||
* which has the happy result of disabling the callbacks without executing
|
||||
* them. A nasty technical complication: a weakref callback can itself be
|
||||
* the target of a weakref, in which case decrefing the callback can cause
|
||||
* another callback to trigger. But we can't allow arbitrary Python code to
|
||||
* get executed at this point (the callback on the callback may try to muck
|
||||
* with other cyclic trash we're trying to collect, even resurrecting it
|
||||
* while we're in the middle of doing tp_clear() on the trash).
|
||||
*
|
||||
* The private _PyWeakref_ClearRef() function exists so that we can clear
|
||||
* the reference in a weakref without triggering a callback on the callback.
|
||||
*
|
||||
* We have to save the callback objects and decref them later. But we can't
|
||||
* allocate new memory to save them (if we can't get new memory, we're dead).
|
||||
* So we grab a new reference on the clear'ed weakref, which prevents the
|
||||
* rest of gc from reclaiming it. _PyWeakref_ClearRef() leaves the
|
||||
* weakref's wr_callback member intact.
|
||||
*
|
||||
* In the end, then, wr_callbacks consists of cleared weakrefs that are
|
||||
* immune from collection. Near the end of gc, after collecting all the
|
||||
* cyclic trash, we call release_weakrefs(). That releases our references
|
||||
* to the cleared weakrefs, which in turn may trigger callbacks on their
|
||||
* callbacks.
|
||||
*/
|
||||
static void
|
||||
clear_weakrefs(PyGC_Head *wr_callbacks)
|
||||
{
|
||||
PyGC_Head *gc = wr_callbacks->gc.gc_next;
|
||||
|
||||
for (; gc != wr_callbacks; gc = gc->gc.gc_next) {
|
||||
PyObject *op = FROM_GC(gc);
|
||||
PyWeakReference *wr;
|
||||
|
||||
assert(IS_REACHABLE(op));
|
||||
assert(PyWeakref_Check(op));
|
||||
wr = (PyWeakReference *)op;
|
||||
assert(wr->wr_callback != NULL);
|
||||
Py_INCREF(op);
|
||||
_PyWeakref_ClearRef(wr);
|
||||
}
|
||||
}
|
||||
|
||||
/* Called near the end of gc. This gives up the references we own to
|
||||
* cleared weakrefs, allowing them to get collected, and in turn decref'ing
|
||||
* their callbacks.
|
||||
*
|
||||
* If a callback object is itself the target of a weakref callback,
|
||||
* decref'ing the callback object may trigger that other callback. If
|
||||
* that other callback was part of the cyclic trash in this generation,
|
||||
* that won't happen, since we cleared *all* trash-weakref callbacks near
|
||||
* the start of gc. If that other callback was not part of the cyclic trash
|
||||
* in this generation, then it acted like an external root to this round
|
||||
* of gc, so all the objects reachable from that callback are still alive.
|
||||
*
|
||||
* Giving up the references to the weakref objects will probably make
|
||||
* them go away too. However, if a weakref is reachable from finalizers,
|
||||
* it won't go away. We move it to the old generation then. Since a
|
||||
* weakref object doesn't have a finalizer, that's the right thing to do (it
|
||||
* doesn't belong in gc.garbage).
|
||||
*
|
||||
* We return the number of weakref objects freed (those not appended to old).
|
||||
/* Clear all weakrefs to unreachable objects, and if such a weakref has a
|
||||
* callback, invoke it if necessary. Note that it's possible for such
|
||||
* weakrefs to be outside the unreachable set -- indeed, those are precisely
|
||||
* the weakrefs whose callbacks must be invoked. See gc_weakref.txt for
|
||||
* overview & some details. Some weakrefs with callbacks may be reclaimed
|
||||
* directly by this routine; the number reclaimed is the return value. Other
|
||||
* weakrefs with callbacks may be moved into the `old` generation. Objects
|
||||
* moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
|
||||
* unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns,
|
||||
* no object in `unreachable` is weakly referenced anymore.
|
||||
*/
|
||||
static int
|
||||
release_weakrefs(PyGC_Head *wr_callbacks, PyGC_Head *old)
|
||||
handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
|
||||
{
|
||||
PyGC_Head *gc;
|
||||
PyObject *op; /* generally FROM_GC(gc) */
|
||||
PyWeakReference *wr; /* generally a cast of op */
|
||||
|
||||
PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */
|
||||
PyGC_Head wrcb_to_kill; /* weakrefs with callbacks to ignore */
|
||||
|
||||
PyGC_Head *next;
|
||||
int num_freed = 0;
|
||||
|
||||
while (! gc_list_is_empty(wr_callbacks)) {
|
||||
PyGC_Head *gc = wr_callbacks->gc.gc_next;
|
||||
PyObject *op = FROM_GC(gc);
|
||||
gc_list_init(&wrcb_to_call);
|
||||
gc_list_init(&wrcb_to_kill);
|
||||
|
||||
/* Clear all weakrefs to the objects in unreachable. If such a weakref
|
||||
* also has a callback, move it into `wrcb_to_call` if the callback
|
||||
* needs to be invoked, or into `wrcb_to_kill` if the callback should
|
||||
* be ignored. Note that we cannot invoke any callbacks until all
|
||||
* weakrefs to unreachable objects are cleared, lest the callback
|
||||
* resurrect an unreachable object via a still-active weakref. That's
|
||||
* why the weakrefs with callbacks are moved into different lists -- we
|
||||
* make another pass over those lists after this pass completes.
|
||||
*/
|
||||
for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) {
|
||||
PyWeakReference **wrlist;
|
||||
|
||||
op = FROM_GC(gc);
|
||||
assert(IS_TENTATIVELY_UNREACHABLE(op));
|
||||
next = gc->gc.gc_next;
|
||||
|
||||
if (! PyType_SUPPORTS_WEAKREFS(op->ob_type))
|
||||
continue;
|
||||
|
||||
/* It supports weakrefs. Does it have any? */
|
||||
wrlist = (PyWeakReference **)
|
||||
PyObject_GET_WEAKREFS_LISTPTR(op);
|
||||
|
||||
/* `op` may have some weakrefs. March over the list, clear
|
||||
* all the weakrefs, and move the weakrefs with callbacks
|
||||
* into one of the wrcb_to_{call,kill} lists.
|
||||
*/
|
||||
for (wr = *wrlist; wr != NULL; wr = *wrlist) {
|
||||
PyGC_Head *wrasgc; /* AS_GC(wr) */
|
||||
|
||||
/* _PyWeakref_ClearRef clears the weakref but leaves
|
||||
* the callback pointer intact. Obscure: it also
|
||||
* changes *wrlist.
|
||||
*/
|
||||
assert(wr->wr_object == op);
|
||||
_PyWeakref_ClearRef(wr);
|
||||
assert(wr->wr_object == Py_None);
|
||||
if (wr->wr_callback == NULL)
|
||||
continue; /* no callback */
|
||||
|
||||
/* Headache time. `op` is going away, and is weakly referenced by
|
||||
* `wr`, which has a callback. Should the callback be invoked? If wr
|
||||
* is also trash, no:
|
||||
*
|
||||
* 1. There's no need to call it. The object and the weakref are
|
||||
* both going away, so it's legitimate to pretend the weakref is
|
||||
* going away first. The user has to ensure a weakref outlives its
|
||||
* referent if they want a guarantee that the wr callback will get
|
||||
* invoked.
|
||||
*
|
||||
* 2. It may be catastrophic to call it. If the callback is also in
|
||||
* cyclic trash (CT), then although the CT is unreachable from
|
||||
* outside the current generation, CT may be reachable from the
|
||||
* callback. Then the callback could resurrect insane objects.
|
||||
*
|
||||
* Since the callback is never needed and may be unsafe in this case,
|
||||
* wr is moved to wrcb_to_kill.
|
||||
*
|
||||
* OTOH, if wr isn't part of CT, we should invoke the callback: the
|
||||
* weakref outlived the trash. Note that since wr isn't CT in this
|
||||
* case, its callback can't be CT either -- wr acted as an external
|
||||
* root to this generation, and therefore its callback did too. So
|
||||
* nothing in CT is reachable from the callback either, so it's hard
|
||||
* to imagine how calling it later could create a problem for us. wr
|
||||
* is moved to wrcb_to_call in this case.
|
||||
*
|
||||
* Obscure: why do we move weakrefs with ignored callbacks to a list
|
||||
* we crawl over later, instead of getting rid of the callback right
|
||||
* now? It's because the obvious way doesn't work: setting
|
||||
* wr->wr_callback to NULL requires that we decref the current
|
||||
* wr->wr_callback. But callbacks are also weakly referenceable, so
|
||||
* wr->wr_callback may *itself* be referenced by another weakref with
|
||||
* another callback. The latter callback could get triggered now if
|
||||
* we decref'ed now, but as explained before it's potentially unsafe to
|
||||
* invoke any callback before all weakrefs to CT are cleared.
|
||||
*/
|
||||
/* Create a new reference so that wr can't go away
|
||||
* before we can process it again.
|
||||
*/
|
||||
Py_INCREF(wr);
|
||||
|
||||
/* Move wr to the appropriate list. */
|
||||
wrasgc = AS_GC(wr);
|
||||
if (wrasgc == next)
|
||||
next = wrasgc->gc.gc_next;
|
||||
gc_list_remove(wrasgc);
|
||||
gc_list_append(wrasgc,
|
||||
IS_TENTATIVELY_UNREACHABLE(wr) ?
|
||||
&wrcb_to_kill : &wrcb_to_call);
|
||||
wrasgc->gc.gc_refs = GC_REACHABLE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Now that all weakrefs to trash have been cleared, it's safe to
|
||||
* decref the callbacks we decided to ignore. We cannot invoke
|
||||
* them because they may be able to resurrect unreachable (from
|
||||
* outside) objects.
|
||||
*/
|
||||
while (! gc_list_is_empty(&wrcb_to_kill)) {
|
||||
gc = wrcb_to_kill.gc.gc_next;
|
||||
op = FROM_GC(gc);
|
||||
assert(IS_REACHABLE(op));
|
||||
assert(PyWeakref_Check(op));
|
||||
assert(((PyWeakReference *)op)->wr_callback != NULL);
|
||||
|
||||
/* Give up the reference we created in the first pass. When
|
||||
* op's refcount hits 0 (which it may or may not do right now),
|
||||
* op's tp_dealloc will decref op->wr_callback too.
|
||||
*/
|
||||
Py_DECREF(op);
|
||||
if (wr_callbacks->gc.gc_next == gc) {
|
||||
if (wrcb_to_kill.gc.gc_next == gc) {
|
||||
/* object is still alive -- move it */
|
||||
gc_list_remove(gc);
|
||||
gc_list_append(gc, old);
|
||||
|
@ -546,6 +600,43 @@ release_weakrefs(PyGC_Head *wr_callbacks, PyGC_Head *old)
|
|||
else
|
||||
++num_freed;
|
||||
}
|
||||
|
||||
/* Finally, invoke the callbacks we decided to honor. It's safe to
|
||||
* invoke them because they cannot reference objects in `unreachable`.
|
||||
*/
|
||||
while (! gc_list_is_empty(&wrcb_to_call)) {
|
||||
PyObject *temp;
|
||||
PyObject *callback;
|
||||
|
||||
gc = wrcb_to_call.gc.gc_next;
|
||||
op = FROM_GC(gc);
|
||||
assert(IS_REACHABLE(op));
|
||||
assert(PyWeakref_Check(op));
|
||||
wr = (PyWeakReference *)op;
|
||||
callback = wr->wr_callback;
|
||||
assert(callback != NULL);
|
||||
|
||||
/* copy-paste of weakrefobject.c's handle_callback() */
|
||||
temp = PyObject_CallFunction(callback, "O", wr);
|
||||
if (temp == NULL)
|
||||
PyErr_WriteUnraisable(callback);
|
||||
else
|
||||
Py_DECREF(temp);
|
||||
|
||||
/* Give up the reference we created in the first pass. When
|
||||
* op's refcount hits 0 (which it may or may not do right now),
|
||||
* op's tp_dealloc will decref op->wr_callback too.
|
||||
*/
|
||||
Py_DECREF(op);
|
||||
if (wrcb_to_call.gc.gc_next == gc) {
|
||||
/* object is still alive -- move it */
|
||||
gc_list_remove(gc);
|
||||
gc_list_append(gc, old);
|
||||
}
|
||||
else
|
||||
++num_freed;
|
||||
}
|
||||
|
||||
return num_freed;
|
||||
}
|
||||
|
||||
|
@ -652,7 +743,6 @@ collect(int generation)
|
|||
PyGC_Head *old; /* next older generation */
|
||||
PyGC_Head unreachable; /* non-problematic unreachable trash */
|
||||
PyGC_Head finalizers; /* objects with, & reachable from, __del__ */
|
||||
PyGC_Head wr_callbacks; /* weakrefs with callbacks */
|
||||
PyGC_Head *gc;
|
||||
|
||||
if (delstr == NULL) {
|
||||
|
@ -690,9 +780,9 @@ collect(int generation)
|
|||
old = young;
|
||||
|
||||
/* Using ob_refcnt and gc_refs, calculate which objects in the
|
||||
* container set are reachable from outside the set (ie. have a
|
||||
* container set are reachable from outside the set (i.e., have a
|
||||
* refcount greater than 0 when all the references within the
|
||||
* set are taken into account
|
||||
* set are taken into account).
|
||||
*/
|
||||
update_refs(young);
|
||||
subtract_refs(young);
|
||||
|
@ -714,23 +804,11 @@ collect(int generation)
|
|||
* finalizers can't safely be deleted. Python programmers should take
|
||||
* care not to create such things. For Python, finalizers means
|
||||
* instance objects with __del__ methods. Weakrefs with callbacks
|
||||
* can call arbitrary Python code, so those are special-cased too.
|
||||
*
|
||||
* Move unreachable objects with finalizers, and weakrefs with
|
||||
* callbacks, into different lists.
|
||||
* can also call arbitrary Python code but they will be dealt with by
|
||||
* handle_weakrefs().
|
||||
*/
|
||||
gc_list_init(&finalizers);
|
||||
gc_list_init(&wr_callbacks);
|
||||
move_troublemakers(&unreachable, &finalizers, &wr_callbacks);
|
||||
/* Clear the trash weakrefs with callbacks. This prevents their
|
||||
* callbacks from getting invoked (when a weakref goes away, so does
|
||||
* its callback).
|
||||
* We do this even if the weakrefs are reachable from finalizers.
|
||||
* If we didn't, breaking cycles in unreachable later could trigger
|
||||
* deallocation of objects in finalizers, which could in turn
|
||||
* cause callbacks to trigger. This may not be ideal behavior.
|
||||
*/
|
||||
clear_weakrefs(&wr_callbacks);
|
||||
move_finalizers(&unreachable, &finalizers);
|
||||
/* finalizers contains the unreachable objects with a finalizer;
|
||||
* unreachable objects reachable *from* those are also uncollectable,
|
||||
* and we move those into the finalizers list too.
|
||||
|
@ -747,17 +825,16 @@ collect(int generation)
|
|||
debug_cycle("collectable", FROM_GC(gc));
|
||||
}
|
||||
}
|
||||
|
||||
/* Clear weakrefs and invoke callbacks as necessary. */
|
||||
m += handle_weakrefs(&unreachable, old);
|
||||
|
||||
/* Call tp_clear on objects in the unreachable set. This will cause
|
||||
* the reference cycles to be broken. It may also cause some objects
|
||||
* in finalizers to be freed.
|
||||
*/
|
||||
delete_garbage(&unreachable, old);
|
||||
|
||||
/* Now that we're done analyzing stuff and breaking cycles, let
|
||||
* delayed weakref callbacks run.
|
||||
*/
|
||||
m += release_weakrefs(&wr_callbacks, old);
|
||||
|
||||
/* Collect statistics on uncollectable objects found and print
|
||||
* debugging information. */
|
||||
for (gc = finalizers.gc.gc_next;
|
||||
|
|
|
@ -850,7 +850,9 @@ PyWeakref_GetObject(PyObject *ref)
|
|||
return PyWeakref_GET_OBJECT(ref);
|
||||
}
|
||||
|
||||
|
||||
/* Note that there's an inlined copy-paste of handle_callback() in gcmodule.c's
|
||||
* handle_weakrefs().
|
||||
*/
|
||||
static void
|
||||
handle_callback(PyWeakReference *ref, PyObject *callback)
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue