Fix typos in gcmodule.c and restructure comments for clarity (GH-17983)
This commit is contained in:
parent
d23f78267a
commit
97f1267a54
|
@ -609,7 +609,7 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
|
|||
// NEXT_MASK_UNREACHABLE flag, we set it unconditionally.
|
||||
// But this may pollute the unreachable list head's 'next' pointer
|
||||
// too. That's semantically senseless but expedient here - the
|
||||
// damage is repaired when this fumction ends.
|
||||
// damage is repaired when this function ends.
|
||||
last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc);
|
||||
_PyGCHead_SET_PREV(gc, last);
|
||||
gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable);
|
||||
|
@ -1039,7 +1039,7 @@ clear_freelists(void)
|
|||
(void)PyContext_ClearFreeList();
|
||||
}
|
||||
|
||||
// Show stats for objects in each gennerations.
|
||||
// Show stats for objects in each generations
|
||||
static void
|
||||
show_stats_each_generations(GCState *gcstate)
|
||||
{
|
||||
|
@ -1058,17 +1058,17 @@ show_stats_each_generations(GCState *gcstate)
|
|||
buf, gc_list_size(&gcstate->permanent_generation.head));
|
||||
}
|
||||
|
||||
/* Deduce wich objects among "base" are unreachable from outside the list
|
||||
/* Deduce which objects among "base" are unreachable from outside the list
|
||||
and move them to 'unreachable'. The process consist in the following steps:
|
||||
|
||||
1. Copy all reference counts to a different field (gc_prev is used to hold
|
||||
this copy to save memory).
|
||||
2. Traverse all objects in "base" and visit all referred objects using
|
||||
"tp_traverse" and for every visited object, substract 1 to the reference
|
||||
"tp_traverse" and for every visited object, subtract 1 to the reference
|
||||
count (the one that we copied in the previous step). After this step, all
|
||||
objects that can be reached directly from outside must have strictly positive
|
||||
reference count, while all unreachable objects must have a count of exactly 0.
|
||||
3. Indentify all unreachable objects (the ones with 0 reference count) and move
|
||||
3. Identify all unreachable objects (the ones with 0 reference count) and move
|
||||
them to the "unreachable" list. This step also needs to move back to "base" all
|
||||
objects that were initially marked as unreachable but are referred transitively
|
||||
by the reachable objects (the ones with strictly positive reference count).
|
||||
|
@ -1098,10 +1098,38 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
|
|||
|
||||
/* Leave everything reachable from outside base in base, and move
|
||||
* everything else (in base) to unreachable.
|
||||
*
|
||||
* NOTE: This used to move the reachable objects into a reachable
|
||||
* set instead. But most things usually turn out to be reachable,
|
||||
* so it's more efficient to move the unreachable things. See note
|
||||
^ [REACHABLE OR UNREACHABLE?] at the file end.
|
||||
* so it's more efficient to move the unreachable things. It "sounds slick"
|
||||
* to move the unreachable objects, until you think about it - the reason it
|
||||
* pays isn't actually obvious.
|
||||
*
|
||||
* Suppose we create objects A, B, C in that order. They appear in the young
|
||||
* generation in the same order. If B points to A, and C to B, and C is
|
||||
* reachable from outside, then the adjusted refcounts will be 0, 0, and 1
|
||||
* respectively.
|
||||
*
|
||||
* When move_unreachable finds A, A is moved to the unreachable list. The
|
||||
* same for B when it's first encountered. Then C is traversed, B is moved
|
||||
* _back_ to the reachable list. B is eventually traversed, and then A is
|
||||
* moved back to the reachable list.
|
||||
*
|
||||
* So instead of not moving at all, the reachable objects B and A are moved
|
||||
* twice each. Why is this a win? A straightforward algorithm to move the
|
||||
* reachable objects instead would move A, B, and C once each.
|
||||
*
|
||||
* The key is that this dance leaves the objects in order C, B, A - it's
|
||||
* reversed from the original order. On all _subsequent_ scans, none of
|
||||
* them will move. Since most objects aren't in cycles, this can save an
|
||||
* unbounded number of moves across an unbounded number of later collections.
|
||||
* It can cost more only the first time the chain is scanned.
|
||||
*
|
||||
* Drawback: move_unreachable is also used to find out what's still trash
|
||||
* after finalizers may resurrect objects. In _that_ case most unreachable
|
||||
* objects will remain unreachable, so it would be more efficient to move
|
||||
* the reachable objects instead. But this is a one-time cost, probably not
|
||||
* worth complicating the code to speed just a little.
|
||||
*/
|
||||
gc_list_init(unreachable);
|
||||
move_unreachable(base, unreachable); // gc_prev is pointer again
|
||||
|
@ -1197,7 +1225,7 @@ collect(PyThreadState *tstate, int generation,
|
|||
gc_list_merge(young, old);
|
||||
}
|
||||
else {
|
||||
/* We only untrack dicts in full collections, to avoid quadratic
|
||||
/* We only un-track dicts in full collections, to avoid quadratic
|
||||
dict build-up. See issue #14775. */
|
||||
untrack_dicts(young);
|
||||
gcstate->long_lived_pending = 0;
|
||||
|
@ -2269,39 +2297,3 @@ PyObject_GC_Del(void *op)
|
|||
}
|
||||
PyObject_FREE(g);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------------
|
||||
Notes
|
||||
|
||||
[REACHABLE OR UNREACHABLE?]
|
||||
|
||||
It "sounds slick" to move the unreachable objects, until you think about
|
||||
it - the reason it pays isn't actually obvious.
|
||||
|
||||
Suppose we create objects A, B, C in that order. They appear in the young
|
||||
generation in the same order. If B points to A, and C to B, and C is
|
||||
reachable from outside, then the adjusted refcounts will be 0, 0, and 1
|
||||
respectively.
|
||||
|
||||
When move_unreachable finds A, A is moved to the unreachable list. The
|
||||
same for B when it's first encountered. Then C is traversed, B is moved
|
||||
_back_ to the reachable list. B is eventually traversed, and then A is
|
||||
moved back to the reachable list.
|
||||
|
||||
So instead of not moving at all, the reachable objects B and A are moved
|
||||
twice each. Why is this a win? A straightforward algorithm to move the
|
||||
reachable objects instead would move A, B, and C once each.
|
||||
|
||||
The key is that this dance leaves the objects in order C, B, A - it's
|
||||
reversed from the original order. On all _subsequent_ scans, none of
|
||||
them will move. Since most objects aren't in cycles, this can save an
|
||||
unbounded number of moves across an unbounded number of later collections.
|
||||
It can cost more only the first time the chain is scanned.
|
||||
|
||||
Drawback: move_unreachable is also used to find out what's still trash
|
||||
after finalizers may resurrect objects. In _that_ case most unreachable
|
||||
objects will remain unreachable, so it would be more efficient to move
|
||||
the reachable objects instead. But this is a one-time cost, probably not
|
||||
worth complicating the code to speed just a little.
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
|
|
Loading…
Reference in New Issue