SF bug #574132: Major GC related performance regression

"The regression" is actually due to that 2.2.1 had a bug that prevented
the regression (which isn't a regression at all) from showing up.  "The
regression" is actually a glitch in cyclic gc that's been there forever.

As the generation being collected is analyzed, objects that can't be
collected (because, e.g., we find they're externally referenced, or
are in an unreachable cycle but have a __del__ method) are moved out
of the list of candidates.  A tricksy scheme uses negative values of
gc_refs to mark such objects as being moved.  However, the exact
negative value set at the start may become "more negative" over time
for objects not in the generation being collected, and the scheme was
checking for an exact match on the negative value originally assigned.
As a result, objects in generations older than the one being collected
could get scanned too, and yanked back into a younger generation.  Doing
so doesn't lead to an error, but doesn't do any good, and can burn an
unbounded amount of time doing useless work.

A test case is simple (thanks to Kevin Jacobs for finding it!):

x = []
for i in xrange(200000):
    x.append((1,))

Without the patch, this ends up scanning all of x on every gen0 collection,
scans all of x twice on every gen1 collection, and x gets yanked back into
gen1 on every gen0 collection.  With the patch, once x gets to gen2, it's
never scanned again until another gen2 collection, and stays in gen2.

Bugfix candidate, although the code has changed enough that I think I'll
need to port it by hand.  2.2.1 also has a different bug that causes
bound method objects not to get tracked at all (so the test case doesn't
burn absurd amounts of time in 2.2.1, but *should* <wink>).
This commit is contained in:
Tim Peters 2002-06-30 17:56:40 +00:00
parent 6238d2b024
commit 8839617cc9
1 changed files with 43 additions and 13 deletions

View File

@ -72,11 +72,19 @@ static int collecting;
DEBUG_SAVEALL
static int debug;
/* Special gc_refs value */
/* When a collection begins, gc_refs is set to ob_refcnt for, and only for,
* the objects in the generation being collected, called the "young"
* generation at that point. As collection proceeds, when it's determined
* that one of these can't be collected (e.g., because it's reachable from
* outside, or has a __del__ method), the object is moved out of young, and
* gc_refs is set to a negative value. The latter is so we can distinguish
* collection candidates from non-candidates just by looking at the object.
*/
/* Special gc_refs value, although any negative value means "moved". */
#define GC_MOVED -123
/* True if an object has been moved to the older generation */
#define IS_MOVED(o) ((AS_GC(o))->gc.gc_refs == GC_MOVED)
/* True iff an object is still a candidate for collection. */
#define STILL_A_CANDIDATE(o) ((AS_GC(o))->gc.gc_refs >= 0)
/* list of uncollectable objects */
static PyObject *garbage;
@ -161,7 +169,10 @@ gc_list_size(PyGC_Head *list)
/* Set all gc_refs = ob_refcnt */
/* Set all gc_refs = ob_refcnt. After this, STILL_A_CANDIDATE(o) is true
* for all objects in containers, and false for all tracked gc objects not
* in containers (although see the comment in visit_decref).
*/
static void
update_refs(PyGC_Head *containers)
{
@ -174,10 +185,22 @@ update_refs(PyGC_Head *containers)
static int
visit_decref(PyObject *op, void *data)
{
/* There's no point to decrementing gc_refs unless
* STILL_A_CANDIDATE(op) is true. It would take extra cycles to
* check that, though. If STILL_A_CANDIDATE(op) is false,
* decrementing gc_refs almost always makes it "even more negative",
* so doesn't change that STILL_A_CANDIDATE is false, and no harm is
* done. However, it's possible that, after many collections, this
* could underflow gc_refs in a long-lived old object. In that case,
* visit_move() may move the old object back to the generation
* getting collected. That would be a waste of time, but wouldn't
* cause an error.
*/
if (op && PyObject_IS_GC(op)) {
if (IS_TRACKED(op))
if (IS_TRACKED(op)) {
AS_GC(op)->gc.gc_refs--;
}
}
return 0;
}
@ -195,7 +218,7 @@ subtract_refs(PyGC_Head *containers)
}
}
/* Append objects with gc_refs > 0 to roots list */
/* Move objects with gc_refs > 0 to roots list. They can't be collected. */
static void
move_roots(PyGC_Head *containers, PyGC_Head *roots)
{
@ -216,7 +239,7 @@ static int
visit_move(PyObject *op, PyGC_Head *tolist)
{
if (PyObject_IS_GC(op)) {
if (IS_TRACKED(op) && !IS_MOVED(op)) {
if (IS_TRACKED(op) && STILL_A_CANDIDATE(op)) {
PyGC_Head *gc = AS_GC(op);
gc_list_remove(gc);
gc_list_append(gc, tolist);
@ -226,7 +249,9 @@ visit_move(PyObject *op, PyGC_Head *tolist)
return 0;
}
/* Move objects referenced from reachable to reachable set. */
/* Move candidates referenced from reachable to reachable set (they're no
* longer candidates).
*/
static void
move_root_reachable(PyGC_Head *reachable)
{
@ -242,7 +267,7 @@ move_root_reachable(PyGC_Head *reachable)
}
}
/* return true of object has a finalization method */
/* return true if object has a finalization method */
static int
has_finalizer(PyObject *op)
{
@ -269,6 +294,7 @@ move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
if (has_finalizer(op)) {
gc_list_remove(gc);
gc_list_append(gc, finalizers);
gc->gc.gc_refs = GC_MOVED;
}
}
}
@ -333,6 +359,7 @@ handle_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
PyList_Append(garbage, op);
}
/* object is now reachable again */
assert(!STILL_A_CANDIDATE(op));
gc_list_remove(gc);
gc_list_append(gc, old);
}
@ -349,6 +376,8 @@ delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
while (!gc_list_is_empty(unreachable)) {
PyGC_Head *gc = unreachable->gc.gc_next;
PyObject *op = FROM_GC(gc);
assert(STILL_A_CANDIDATE(op));
if (debug & DEBUG_SAVEALL) {
PyList_Append(garbage, op);
}
@ -363,6 +392,7 @@ delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
/* object is still alive, move it, it may die later */
gc_list_remove(gc);
gc_list_append(gc, old);
gc->gc.gc_refs = GC_MOVED;
}
}
}