SF bug #574132: Major GC related performance regression
"The regression" is actually due to that 2.2.1 had a bug that prevented the regression (which isn't a regression at all) from showing up. "The regression" is actually a glitch in cyclic gc that's been there forever. As the generation being collected is analyzed, objects that can't be collected (because, e.g., we find they're externally referenced, or are in an unreachable cycle but have a __del__ method) are moved out of the list of candidates. A tricksy scheme uses negative values of gc_refs to mark such objects as being moved. However, the exact negative value set at the start may become "more negative" over time for objects not in the generation being collected, and the scheme was checking for an exact match on the negative value originally assigned. As a result, objects in generations older than the one being collected could get scanned too, and yanked back into a younger generation. Doing so doesn't lead to an error, but doesn't do any good, and can burn an unbounded amount of time doing useless work. A test case is simple (thanks to Kevin Jacobs for finding it!): x = [] for i in xrange(200000): x.append((1,)) Without the patch, this ends up scanning all of x on every gen0 collection, scans all of x twice on every gen1 collection, and x gets yanked back into gen1 on every gen0 collection. With the patch, once x gets to gen2, it's never scanned again until another gen2 collection, and stays in gen2. Bugfix candidate, although the code has changed enough that I think I'll need to port it by hand. 2.2.1 also has a different bug that causes bound method objects not to get tracked at all (so the test case doesn't burn absurd amounts of time in 2.2.1, but *should* <wink>).
This commit is contained in:
parent
6238d2b024
commit
8839617cc9
|
@ -72,11 +72,19 @@ static int collecting;
|
|||
DEBUG_SAVEALL
|
||||
static int debug;
|
||||
|
||||
/* Special gc_refs value */
|
||||
/* When a collection begins, gc_refs is set to ob_refcnt for, and only for,
|
||||
* the objects in the generation being collected, called the "young"
|
||||
* generation at that point. As collection proceeds, when it's determined
|
||||
* that one of these can't be collected (e.g., because it's reachable from
|
||||
* outside, or has a __del__ method), the object is moved out of young, and
|
||||
* gc_refs is set to a negative value. The latter is so we can distinguish
|
||||
* collection candidates from non-candidates just by looking at the object.
|
||||
*/
|
||||
/* Special gc_refs value, although any negative value means "moved". */
|
||||
#define GC_MOVED -123
|
||||
|
||||
/* True if an object has been moved to the older generation */
|
||||
#define IS_MOVED(o) ((AS_GC(o))->gc.gc_refs == GC_MOVED)
|
||||
/* True iff an object is still a candidate for collection. */
|
||||
#define STILL_A_CANDIDATE(o) ((AS_GC(o))->gc.gc_refs >= 0)
|
||||
|
||||
/* list of uncollectable objects */
|
||||
static PyObject *garbage;
|
||||
|
@ -161,7 +169,10 @@ gc_list_size(PyGC_Head *list)
|
|||
|
||||
|
||||
|
||||
/* Set all gc_refs = ob_refcnt */
|
||||
/* Set all gc_refs = ob_refcnt. After this, STILL_A_CANDIDATE(o) is true
|
||||
* for all objects in containers, and false for all tracked gc objects not
|
||||
* in containers (although see the comment in visit_decref).
|
||||
*/
|
||||
static void
|
||||
update_refs(PyGC_Head *containers)
|
||||
{
|
||||
|
@ -174,10 +185,22 @@ update_refs(PyGC_Head *containers)
|
|||
static int
|
||||
visit_decref(PyObject *op, void *data)
|
||||
{
|
||||
/* There's no point to decrementing gc_refs unless
|
||||
* STILL_A_CANDIDATE(op) is true. It would take extra cycles to
|
||||
* check that, though. If STILL_A_CANDIDATE(op) is false,
|
||||
* decrementing gc_refs almost always makes it "even more negative",
|
||||
* so doesn't change that STILL_A_CANDIDATE is false, and no harm is
|
||||
* done. However, it's possible that, after many collections, this
|
||||
* could underflow gc_refs in a long-lived old object. In that case,
|
||||
* visit_move() may move the old object back to the generation
|
||||
* getting collected. That would be a waste of time, but wouldn't
|
||||
* cause an error.
|
||||
*/
|
||||
if (op && PyObject_IS_GC(op)) {
|
||||
if (IS_TRACKED(op))
|
||||
if (IS_TRACKED(op)) {
|
||||
AS_GC(op)->gc.gc_refs--;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -195,7 +218,7 @@ subtract_refs(PyGC_Head *containers)
|
|||
}
|
||||
}
|
||||
|
||||
/* Append objects with gc_refs > 0 to roots list */
|
||||
/* Move objects with gc_refs > 0 to roots list. They can't be collected. */
|
||||
static void
|
||||
move_roots(PyGC_Head *containers, PyGC_Head *roots)
|
||||
{
|
||||
|
@ -216,7 +239,7 @@ static int
|
|||
visit_move(PyObject *op, PyGC_Head *tolist)
|
||||
{
|
||||
if (PyObject_IS_GC(op)) {
|
||||
if (IS_TRACKED(op) && !IS_MOVED(op)) {
|
||||
if (IS_TRACKED(op) && STILL_A_CANDIDATE(op)) {
|
||||
PyGC_Head *gc = AS_GC(op);
|
||||
gc_list_remove(gc);
|
||||
gc_list_append(gc, tolist);
|
||||
|
@ -226,7 +249,9 @@ visit_move(PyObject *op, PyGC_Head *tolist)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Move objects referenced from reachable to reachable set. */
|
||||
/* Move candidates referenced from reachable to reachable set (they're no
|
||||
* longer candidates).
|
||||
*/
|
||||
static void
|
||||
move_root_reachable(PyGC_Head *reachable)
|
||||
{
|
||||
|
@ -242,7 +267,7 @@ move_root_reachable(PyGC_Head *reachable)
|
|||
}
|
||||
}
|
||||
|
||||
/* return true of object has a finalization method */
|
||||
/* return true if object has a finalization method */
|
||||
static int
|
||||
has_finalizer(PyObject *op)
|
||||
{
|
||||
|
@ -269,6 +294,7 @@ move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
|
|||
if (has_finalizer(op)) {
|
||||
gc_list_remove(gc);
|
||||
gc_list_append(gc, finalizers);
|
||||
gc->gc.gc_refs = GC_MOVED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -333,6 +359,7 @@ handle_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
|
|||
PyList_Append(garbage, op);
|
||||
}
|
||||
/* object is now reachable again */
|
||||
assert(!STILL_A_CANDIDATE(op));
|
||||
gc_list_remove(gc);
|
||||
gc_list_append(gc, old);
|
||||
}
|
||||
|
@ -349,6 +376,8 @@ delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
|
|||
while (!gc_list_is_empty(unreachable)) {
|
||||
PyGC_Head *gc = unreachable->gc.gc_next;
|
||||
PyObject *op = FROM_GC(gc);
|
||||
|
||||
assert(STILL_A_CANDIDATE(op));
|
||||
if (debug & DEBUG_SAVEALL) {
|
||||
PyList_Append(garbage, op);
|
||||
}
|
||||
|
@ -363,6 +392,7 @@ delete_garbage(PyGC_Head *unreachable, PyGC_Head *old)
|
|||
/* object is still alive, move it, it may die later */
|
||||
gc_list_remove(gc);
|
||||
gc_list_append(gc, old);
|
||||
gc->gc.gc_refs = GC_MOVED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue