SF bug 839548: Bug in type's GC handling causes segfaults.

Also SF patch 843455.

This is a critical bugfix.
I'll backport to 2.3 maint, but not beyond that.  The bugs this fixes
have been there since weakrefs were introduced.
This commit is contained in:
Tim Peters 2003-11-20 21:21:46 +00:00
parent 901dc98316
commit 403a203223
6 changed files with 491 additions and 20 deletions

View File

@ -39,6 +39,8 @@ PyAPI_FUNC(PyObject *) PyWeakref_GetObject(PyObject *ref);
PyAPI_FUNC(long) _PyWeakref_GetWeakrefCount(PyWeakReference *head);
PyAPI_FUNC(void) _PyWeakref_ClearRef(PyWeakReference *self);
#define PyWeakref_GET_OBJECT(ref) (((PyWeakReference *)(ref))->wr_object)

View File

@ -337,6 +337,211 @@ class ReferencesTestCase(TestBase):
# deallocation of c2.
del c2
def test_callback_in_cycle_1(self):
import gc
class J(object):
pass
class II(object):
def acallback(self, ignore):
self.J
I = II()
I.J = J
I.wr = weakref.ref(J, I.acallback)
# Now J and II are each in a self-cycle (as all new-style class
# objects are, since their __mro__ points back to them). I holds
# both a weak reference (I.wr) and a strong reference (I.J) to class
# J. I is also in a cycle (I.wr points to a weakref that references
# I.acallback). When we del these three, they all become trash, but
# the cycles prevent any of them from getting cleaned up immediately.
# Instead they have to wait for cyclic gc to deduce that they're
# trash.
#
# gc used to call tp_clear on all of them, and the order in which
# it does that is pretty accidental. The exact order in which we
# built up these things manages to provoke gc into running tp_clear
# in just the right order (I last). Calling tp_clear on II leaves
# behind an insane class object (its __mro__ becomes NULL). Calling
# tp_clear on J breaks its self-cycle, but J doesn't get deleted
# just then because of the strong reference from I.J. Calling
# tp_clear on I starts to clear I's __dict__, and just happens to
# clear I.J first -- I.wr is still intact. That removes the last
# reference to J, which triggers the weakref callback. The callback
# tries to do "self.J", and instances of new-style classes look up
# attributes ("J") in the class dict first. The class (II) wants to
# search II.__mro__, but that's NULL. The result was a segfault in
# a release build, and an assert failure in a debug build.
del I, J, II
gc.collect()
def test_callback_in_cycle_2(self):
import gc
# This is just like test_callback_in_cycle_1, except that II is an
# old-style class. The symptom is different then: an instance of an
# old-style class looks in its own __dict__ first. 'J' happens to
# get cleared from I.__dict__ before 'wr', and 'J' was never in II's
# __dict__, so the attribute isn't found. The difference is that
# the old-style II doesn't have a NULL __mro__ (it doesn't have any
# __mro__), so no segfault occurs. Instead it got:
# test_callback_in_cycle_2 (__main__.ReferencesTestCase) ...
# Exception exceptions.AttributeError:
# "II instance has no attribute 'J'" in <bound method II.acallback
# of <?.II instance at 0x00B9B4B8>> ignored
class J(object):
pass
class II:
def acallback(self, ignore):
self.J
I = II()
I.J = J
I.wr = weakref.ref(J, I.acallback)
del I, J, II
gc.collect()
def test_callback_in_cycle_3(self):
import gc
# This one broke the first patch that fixed the last two. In this
# case, the objects reachable from the callback aren't also reachable
# from the object (c1) *triggering* the callback: you can get to
# c1 from c2, but not vice-versa. The result was that c2's __dict__
# got tp_clear'ed by the time the c2.cb callback got invoked.
class C:
def cb(self, ignore):
self.me
self.c1
self.wr
c1, c2 = C(), C()
c2.me = c2
c2.c1 = c1
c2.wr = weakref.ref(c1, c2.cb)
del c1, c2
gc.collect()
def test_callback_in_cycle_4(self):
import gc
# Like test_callback_in_cycle_3, except c2 and c1 have different
# classes. c2's class (C) isn't reachable from c1 then, so protecting
# objects reachable from the dying object (c1) isn't enough to stop
# c2's class (C) from getting tp_clear'ed before c2.cb is invoked.
# The result was a segfault (C.__mro__ was NULL when the callback
# tried to look up self.me).
class C(object):
def cb(self, ignore):
self.me
self.c1
self.wr
class D:
pass
c1, c2 = D(), C()
c2.me = c2
c2.c1 = c1
c2.wr = weakref.ref(c1, c2.cb)
del c1, c2, C, D
gc.collect()
def test_callback_in_cycle_resurrection(self):
import gc
# Do something nasty in a weakref callback: resurrect objects
# from dead cycles. For this to be attempted, the weakref and
# its callback must also be part of the cyclic trash (else the
# objects reachable via the callback couldn't be in cyclic trash
# to begin with -- the callback would act like an external root).
# But gc clears trash weakrefs with callbacks early now, which
# disables the callbacks, so the callbacks shouldn't get called
# at all (and so nothing actually gets resurrected).
alist = []
class C(object):
def __init__(self, value):
self.attribute = value
def acallback(self, ignore):
alist.append(self.c)
c1, c2 = C(1), C(2)
c1.c = c2
c2.c = c1
c1.wr = weakref.ref(c2, c1.acallback)
c2.wr = weakref.ref(c1, c2.acallback)
def C_went_away(ignore):
alist.append("C went away")
wr = weakref.ref(C, C_went_away)
del c1, c2, C # make them all trash
self.assertEqual(alist, []) # del isn't enough to reclaim anything
gc.collect()
# c1.wr and c2.wr were part of the cyclic trash, so should have
# been cleared without their callbacks executing. OTOH, the weakref
# to C is bound to a function local (wr), and wasn't trash, so that
# callback should have been invoked when C went away.
self.assertEqual(alist, ["C went away"])
# The remaining weakref should be dead now (its callback ran).
self.assertEqual(wr(), None)
del alist[:]
gc.collect()
self.assertEqual(alist, [])
def test_callbacks_on_callback(self):
import gc
# Set up weakref callbacks *on* weakref callbacks.
alist = []
def safe_callback(ignore):
alist.append("safe_callback called")
class C(object):
def cb(self, ignore):
alist.append("cb called")
c, d = C(), C()
c.other = d
d.other = c
callback = c.cb
c.wr = weakref.ref(d, callback) # this won't trigger
d.wr = weakref.ref(callback, d.cb) # ditto
external_wr = weakref.ref(callback, safe_callback) # but this will
self.assert_(external_wr() is callback)
# The weakrefs attached to c and d should get cleared, so that
# C.cb is never called. But external_wr isn't part of the cyclic
# trash, and no cyclic trash is reachable from it, so safe_callback
# should get invoked when the bound method object callback (c.cb)
# -- which is itself a callback, and also part of the cyclic trash --
# gets reclaimed at the end of gc.
del callback, c, d, C
self.assertEqual(alist, []) # del isn't enough to clean up cycles
gc.collect()
self.assertEqual(alist, ["safe_callback called"])
self.assertEqual(external_wr(), None)
del alist[:]
gc.collect()
self.assertEqual(alist, [])
class Object:
def __init__(self, arg):
self.arg = arg

View File

@ -12,9 +12,20 @@ What's New in Python 2.4 alpha 1?
Core and builtins
-----------------
- Compiler flags set in PYTHONSTARTUP are now active in __main__.
- Added two builtin types, set() and frozenset().
- Critical bugfix, for SF bug 839548: if a weakref with a callback,
its callback, and its weakly referenced object, all became part of
cyclic garbage during a single run of garbage collection, the order
in which they were torn down was unpredictable. It was possible for
the callback to see partially-torn-down objects, leading to immediate
segfaults, or, if the callback resurrected garbage objects, to
resurrect insane objects that caused segfaults (or other surprises)
later. In one sense this wasn't surprising, because Python's cyclic gc
had no knowledge of Python's weakref objects. It does now. When
weakrefs with callbacks become part of cyclic garbage now, those
weakrefs are cleared first. The callbacks don't trigger then,
preventing the problems. If you need callbacks to trigger, then just
as when cyclic gc is not involved, you need to write your code so
that weakref objects outlive the objects they weakly reference.
- Critical bugfix, for SF bug 840829: if cyclic garbage collection
happened to occur during a weakref callback for a new-style class
@ -22,6 +33,10 @@ Core and builtins
in a debug build, a segfault occurred reliably very soon after).
This has been repaired.
- Compiler flags set in PYTHONSTARTUP are now active in __main__.
- Added two builtin types, set() and frozenset().
- Added a reversed() builtin function that returns a reverse iterator
over a sequence.

107
Modules/gc_weakref.txt Normal file
View File

@ -0,0 +1,107 @@
Before 2.3.3, Python's cyclic gc didn't pay any attention to weakrefs.
Segfaults in Zope3 resulted.
weakrefs in Python are designed to, at worst, let *other* objects learn
that a given object has died, via a callback function. The weakly
referenced object itself is not passed to the callback, and the presumption
is that the weakly referenced object is unreachable trash at the time the
callback is invoked.
That's usually true, but not always. Suppose a weakly referenced object
becomes part of a clump of cyclic trash. When enough cycles are broken by
cyclic gc that the object is reclaimed, the callback is invoked. If it's
possible for the callback to get at objects in the cycle(s), then it may be
possible for those objects to access (via strong references in the cycle)
the weakly referenced object being torn down, or other objects in the cycle
that have already suffered a tp_clear() call. There's no guarantee that an
object is in a sane state after tp_clear(). Bad things (including
segfaults) can happen right then, during the callback's execution, or can
happen at any later time if the callback manages to resurrect an insane
object.
Note that if it's possible for the callback to get at objects in the trash
cycles, it must also be the case that the callback itself is part of the
trash cycles. Else the callback would have acted as an external root to
the current collection, and nothing reachable from it would be in cyclic
trash either.
More, if the callback itself is in cyclic trash, then the weakref to which
the callback is attached must also be trash, and for the same kind of
reason: if the weakref acted as an external root, then the callback could
not have been cyclic trash.
So a problem here requires that a weakref, that weakref's callback, and the
weakly referenced object, all be in cyclic trash at the same time. This
isn't easy to stumble into by accident while Python is running, and, indeed,
it took quite a while to dream up failing test cases. Zope3 saw segfaults
during shutdown, during the second call of gc in Py_Finalize, after most
modules had been torn down. That creates many trash cycles (esp. those
involving new-style classes), making the problem much more likely. Once you
know what's required to provoke the problem, though, it's easy to create
tests that segfault before shutdown.
In 2.3.3, before breaking cycles, we first clear all the weakrefs with
callbacks in cyclic trash. Since the weakrefs *are* trash, and there's no
defined-- or even predictable --order in which tp_clear() gets called on
cyclic trash, it's defensible to first clear weakrefs with callbacks. It's
a feature of Python's weakrefs too that when a weakref goes away, the
callback (if any) associated with it is thrown away too, unexecuted.
Just that much is almost enough to prevent problems, by throwing away
*almost* all the weakref callbacks that could get triggered by gc. The
problem remaining is that clearing a weakref with a callback decrefs the
callback object, and the callback object may *itself* be weakly referenced,
via another weakref with another callback. So the process of clearing
weakrefs can trigger callbacks attached to other weakrefs, and those
latter weakrefs may or may not be part of cyclic trash.
So, to prevent any Python code from running while gc is invoking tp_clear()
on all the objects in cyclic trash, it's not quite enough just to invoke
tp_clear() on weakrefs with callbacks first. Instead the weakref module
grew a new private function (_PyWeakref_ClearRef) that does only part of
tp_clear(): it removes the weakref from the weakly-referenced object's list
of weakrefs, but does not decref the callback object. So calling
_PyWeakref_ClearRef(wr) ensures that wr's callback object will never
trigger, and (unlike weakref's tp_clear()) also prevents any callback
associated *with* wr's callback object from triggering.
Then we can call tp_clear on all the cyclic objects and never trigger
Python code.
After we do that, the callback objects still need to be decref'ed. Callbacks
(if any) *on* the callback objects that were also part of cyclic trash won't
get invoked, because we cleared all trash weakrefs with callbacks at the
start. Callbacks on the callback objects that were not part of cyclic trash
acted as external roots to everything reachable from them, so nothing
reachable from them was part of cyclic trash, so gc didn't do any damage to
objects reachable from them, and it's safe to call them at the end of gc.
An alternative would have been to treat objects with callbacks like objects
with __del__ methods, refusing to collect them, appending them to gc.garbage
instead. That would have been much easier. Jim Fulton gave a strong
argument against that (on Python-Dev):
There's a big difference between __del__ and weakref callbacks.
The __del__ method is "internal" to a design. When you design a
class with a del method, you know you have to avoid including the
class in cycles.
Now, suppose you have a design that makes has no __del__ methods but
that does use cyclic data structures. You reason about the design,
run tests, and convince yourself you don't have a leak.
Now, suppose some external code creates a weakref to one of your
objects. All of a sudden, you start leaking. You can look at your
code all you want and you won't find a reason for the leak.
IOW, a class designer can out-think __del__ problems, but has no control
over who creates weakrefs to his classes or class instances. The class
user has little chance either of predicting when the weakrefs he creates
may end up in cycles.
Callbacks on weakref callbacks are executed in an arbitrary order, and
that's not good (a primary reason not to collect cycles with objects with
__del__ methods is to avoid running finalizers in an arbitrary order).
However, a weakref callback on a weakref callback has got to be rare.
It's possible to do such a thing, so gc has to be robust against it, but
I doubt anyone has done it outside the test case I wrote for it.

View File

@ -396,13 +396,17 @@ has_finalizer(PyObject *op)
return 0;
}
/* Move the objects in unreachable with __del__ methods into finalizers.
* The objects remaining in unreachable do not have __del__ methods, and
* gc_refs remains GC_TENTATIVELY_UNREACHABLE for them. The objects
* moved into finalizers have gc_refs changed to GC_REACHABLE.
/* Move the objects in unreachable with __del__ methods into finalizers,
* and weakrefs with callbacks into wr_callbacks.
* The objects remaining in unreachable do not have __del__ methods, and are
* not weakrefs with callbacks.
* The objects moved have gc_refs changed to GC_REACHABLE; the objects
* remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE.
*/
static void
move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
move_troublemakers(PyGC_Head *unreachable,
PyGC_Head *finalizers,
PyGC_Head *wr_callbacks)
{
PyGC_Head *gc = unreachable->gc.gc_next;
@ -417,6 +421,12 @@ move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
gc_list_append(gc, finalizers);
gc->gc.gc_refs = GC_REACHABLE;
}
else if (PyWeakref_Check(op) &&
((PyWeakReference *)op)->wr_callback) {
gc_list_remove(gc);
gc_list_append(gc, wr_callbacks);
gc->gc.gc_refs = GC_REACHABLE;
}
gc = next;
}
}
@ -453,6 +463,93 @@ move_finalizer_reachable(PyGC_Head *finalizers)
}
}
/* Clear all trash weakrefs with callbacks. This clears weakrefs first,
* which has the happy result of disabling the callbacks without executing
* them. A nasty technical complication: a weakref callback can itself be
* the target of a weakref, in which case decrefing the callback can cause
* another callback to trigger. But we can't allow arbitrary Python code to
* get executed at this point (the callback on the callback may try to muck
* with other cyclic trash we're trying to collect, even resurrecting it
* while we're in the middle of doing tp_clear() on the trash).
*
* The private _PyWeakref_ClearRef() function exists so that we can clear
* the reference in a weakref without triggering a callback on the callback.
*
* We have to save the callback objects and decref them later. But we can't
* allocate new memory to save them (if we can't get new memory, we're dead).
* So we grab a new reference on the clear'ed weakref, which prevents the
* rest of gc from reclaiming it. _PyWeakref_ClearRef() leaves the
* weakref's wr_callback member intact.
*
* In the end, then, wr_callbacks consists of cleared weakrefs that are
* immune from collection. Near the end of gc, after collecting all the
* cyclic trash, we call release_weakrefs(). That releases our references
* to the cleared weakrefs, which in turn may trigger callbacks on their
* callbacks.
*/
static void
clear_weakrefs(PyGC_Head *wr_callbacks)
{
PyGC_Head *gc = wr_callbacks->gc.gc_next;
for (; gc != wr_callbacks; gc = gc->gc.gc_next) {
PyObject *op = FROM_GC(gc);
PyWeakReference *wr;
assert(IS_REACHABLE(op));
assert(PyWeakref_Check(op));
wr = (PyWeakReference *)op;
assert(wr->wr_callback != NULL);
Py_INCREF(op);
_PyWeakref_ClearRef(wr);
}
}
/* Called near the end of gc. This gives up the references we own to
* cleared weakrefs, allowing them to get collected, and in turn decref'ing
* their callbacks.
*
* If a callback object is itself the target of a weakref callback,
* decref'ing the callback object may trigger that other callback. If
* that other callback was part of the cyclic trash in this generation,
* that won't happen, since we cleared *all* trash-weakref callbacks near
* the start of gc. If that other callback was not part of the cyclic trash
* in this generation, then it acted like an external root to this round
* of gc, so all the objects reachable from that callback are still alive.
*
* Giving up the references to the weakref objects will probably make
* them go away too. However, if a weakref is reachable from finalizers,
* it won't go away. We move it to the old generation then. Since a
* weakref object doesn't have a finalizer, that's the right thing to do (it
* doesn't belong in gc.garbage).
*
* We return the number of weakref objects freed (those not appended to old).
*/
static int
release_weakrefs(PyGC_Head *wr_callbacks, PyGC_Head *old)
{
int num_freed = 0;
while (! gc_list_is_empty(wr_callbacks)) {
PyGC_Head *gc = wr_callbacks->gc.gc_next;
PyObject *op = FROM_GC(gc);
PyWeakReference *wr = (PyWeakReference *)op;
assert(IS_REACHABLE(op));
assert(PyWeakref_Check(op));
assert(wr->wr_callback != NULL);
Py_DECREF(op);
if (wr_callbacks->gc.gc_next == gc) {
/* object is still alive -- move it */
gc_list_remove(gc);
gc_list_append(gc, old);
}
else
++num_freed;
}
return num_freed;
}
static void
debug_instance(char *msg, PyInstanceObject *inst)
{
@ -554,8 +651,9 @@ collect(int generation)
long n = 0; /* # unreachable objects that couldn't be collected */
PyGC_Head *young; /* the generation we are examining */
PyGC_Head *old; /* next older generation */
PyGC_Head unreachable;
PyGC_Head finalizers;
PyGC_Head unreachable; /* non-problematic unreachable trash */
PyGC_Head finalizers; /* objects with, & reachable from, __del__ */
PyGC_Head wr_callbacks; /* weakrefs with callbacks */
PyGC_Head *gc;
if (delstr == NULL) {
@ -616,20 +714,33 @@ collect(int generation)
/* All objects in unreachable are trash, but objects reachable from
* finalizers can't safely be deleted. Python programmers should take
* care not to create such things. For Python, finalizers means
* instance objects with __del__ methods.
* instance objects with __del__ methods. Weakrefs with callbacks
* can call arbitrary Python code, so those are special-cased too.
*
* Move unreachable objects with finalizers into a different list.
* Move unreachable objects with finalizers, and weakrefs with
* callbacks, into different lists.
*/
gc_list_init(&finalizers);
move_finalizers(&unreachable, &finalizers);
gc_list_init(&wr_callbacks);
move_troublemakers(&unreachable, &finalizers, &wr_callbacks);
/* Clear the trash weakrefs with callbacks. This prevents their
* callbacks from getting invoked (when a weakref goes away, so does
* its callback).
* We do this even if the weakrefs are reachable from finalizers.
* If we didn't, breaking cycles in unreachable later could trigger
* deallocation of objects in finalizers, which could in turn
* cause callbacks to trigger. This may not be ideal behavior.
*/
clear_weakrefs(&wr_callbacks);
/* finalizers contains the unreachable objects with a finalizer;
* unreachable objects reachable only *from* those are also
* uncollectable, and we move those into the finalizers list too.
* unreachable objects reachable *from* those are also uncollectable,
* and we move those into the finalizers list too.
*/
move_finalizer_reachable(&finalizers);
/* Collect statistics on collectable objects found and print
* debugging information. */
* debugging information.
*/
for (gc = unreachable.gc.gc_next; gc != &unreachable;
gc = gc->gc.gc_next) {
m++;
@ -643,6 +754,11 @@ collect(int generation)
*/
delete_garbage(&unreachable, old);
/* Now that we're done analyzing stuff and breaking cycles, let
* delayed weakref callbacks run.
*/
m += release_weakrefs(&wr_callbacks, old);
/* Collect statistics on uncollectable objects found and print
* debugging information. */
for (gc = finalizers.gc.gc_next;

View File

@ -53,17 +53,43 @@ clear_weakref(PyWeakReference *self)
if (*list == self)
*list = self->wr_next;
self->wr_object = Py_None;
self->wr_callback = NULL;
if (self->wr_prev != NULL)
self->wr_prev->wr_next = self->wr_next;
if (self->wr_next != NULL)
self->wr_next->wr_prev = self->wr_prev;
self->wr_prev = NULL;
self->wr_next = NULL;
Py_XDECREF(callback);
}
if (callback != NULL) {
Py_DECREF(callback);
self->wr_callback = NULL;
}
}
/* Cyclic gc uses this to *just* clear the passed-in reference, leaving
* the callback intact and uncalled. It must be possible to call self's
* tp_dealloc() after calling this, so self has to be left in a sane enough
* state for that to work. We expect tp_dealloc to decref the callback
* then. The reason for not letting clear_weakref() decref the callback
* right now is that if the callback goes away, that may in turn trigger
* another callback (if a weak reference to the callback exists) -- running
* arbitrary Python code in the middle of gc is a disaster. The convolution
* here allows gc to delay triggering such callbacks until the world is in
* a sane state again.
*/
void
_PyWeakref_ClearRef(PyWeakReference *self)
{
PyObject *callback;
assert(self != NULL);
assert(PyWeakref_Check(self));
/* Preserve and restore the callback around clear_weakref. */
callback = self->wr_callback;
self->wr_callback = NULL;
clear_weakref(self);
self->wr_callback = callback;
}
static void
weakref_dealloc(PyWeakReference *self)
@ -117,7 +143,7 @@ weakref_hash(PyWeakReference *self)
self->hash = PyObject_Hash(PyWeakref_GET_OBJECT(self));
return self->hash;
}
static PyObject *
weakref_repr(PyWeakReference *self)
@ -324,7 +350,7 @@ WRAP_BINARY(proxy_iand, PyNumber_InPlaceAnd)
WRAP_BINARY(proxy_ixor, PyNumber_InPlaceXor)
WRAP_BINARY(proxy_ior, PyNumber_InPlaceOr)
static int
static int
proxy_nonzero(PyWeakReference *proxy)
{
PyObject *o = PyWeakref_GET_OBJECT(proxy);