Fix typos in gcmodule.c and restructure comments for clarity (GH-17983)

This commit is contained in:
Pablo Galindo 2020-01-13 12:25:05 +00:00 committed by GitHub
parent d23f78267a
commit 97f1267a54
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 36 additions and 44 deletions

View File

@ -609,7 +609,7 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
// NEXT_MASK_UNREACHABLE flag, we set it unconditionally.
// But this may pollute the unreachable list head's 'next' pointer
// too. That's semantically senseless but expedient here - the
// damage is repaired when this fumction ends.
// damage is repaired when this function ends.
last->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)gc);
_PyGCHead_SET_PREV(gc, last);
gc->_gc_next = (NEXT_MASK_UNREACHABLE | (uintptr_t)unreachable);
@ -1039,7 +1039,7 @@ clear_freelists(void)
(void)PyContext_ClearFreeList();
}
// Show stats for objects in each gennerations.
// Show stats for objects in each generations
static void
show_stats_each_generations(GCState *gcstate)
{
@ -1058,17 +1058,17 @@ show_stats_each_generations(GCState *gcstate)
buf, gc_list_size(&gcstate->permanent_generation.head));
}
/* Deduce wich objects among "base" are unreachable from outside the list
/* Deduce which objects among "base" are unreachable from outside the list
and move them to 'unreachable'. The process consist in the following steps:
1. Copy all reference counts to a different field (gc_prev is used to hold
this copy to save memory).
2. Traverse all objects in "base" and visit all referred objects using
"tp_traverse" and for every visited object, substract 1 to the reference
"tp_traverse" and for every visited object, subtract 1 to the reference
count (the one that we copied in the previous step). After this step, all
objects that can be reached directly from outside must have strictly positive
reference count, while all unreachable objects must have a count of exactly 0.
3. Indentify all unreachable objects (the ones with 0 reference count) and move
3. Identify all unreachable objects (the ones with 0 reference count) and move
them to the "unreachable" list. This step also needs to move back to "base" all
objects that were initially marked as unreachable but are referred transitively
by the reachable objects (the ones with strictly positive reference count).
@ -1098,10 +1098,38 @@ deduce_unreachable(PyGC_Head *base, PyGC_Head *unreachable) {
/* Leave everything reachable from outside base in base, and move
* everything else (in base) to unreachable.
*
* NOTE: This used to move the reachable objects into a reachable
* set instead. But most things usually turn out to be reachable,
* so it's more efficient to move the unreachable things. See note
^ [REACHABLE OR UNREACHABLE?] at the file end.
* so it's more efficient to move the unreachable things. It "sounds slick"
* to move the unreachable objects, until you think about it - the reason it
* pays isn't actually obvious.
*
* Suppose we create objects A, B, C in that order. They appear in the young
* generation in the same order. If B points to A, and C to B, and C is
* reachable from outside, then the adjusted refcounts will be 0, 0, and 1
* respectively.
*
* When move_unreachable finds A, A is moved to the unreachable list. The
* same for B when it's first encountered. Then C is traversed, B is moved
* _back_ to the reachable list. B is eventually traversed, and then A is
* moved back to the reachable list.
*
* So instead of not moving at all, the reachable objects B and A are moved
* twice each. Why is this a win? A straightforward algorithm to move the
* reachable objects instead would move A, B, and C once each.
*
* The key is that this dance leaves the objects in order C, B, A - it's
* reversed from the original order. On all _subsequent_ scans, none of
* them will move. Since most objects aren't in cycles, this can save an
* unbounded number of moves across an unbounded number of later collections.
* It can cost more only the first time the chain is scanned.
*
* Drawback: move_unreachable is also used to find out what's still trash
* after finalizers may resurrect objects. In _that_ case most unreachable
* objects will remain unreachable, so it would be more efficient to move
* the reachable objects instead. But this is a one-time cost, probably not
* worth complicating the code to speed just a little.
*/
gc_list_init(unreachable);
move_unreachable(base, unreachable); // gc_prev is pointer again
@ -1197,7 +1225,7 @@ collect(PyThreadState *tstate, int generation,
gc_list_merge(young, old);
}
else {
/* We only untrack dicts in full collections, to avoid quadratic
/* We only un-track dicts in full collections, to avoid quadratic
dict build-up. See issue #14775. */
untrack_dicts(young);
gcstate->long_lived_pending = 0;
@ -2269,39 +2297,3 @@ PyObject_GC_Del(void *op)
}
PyObject_FREE(g);
}
/* ------------------------------------------------------------------------
Notes
[REACHABLE OR UNREACHABLE?]
It "sounds slick" to move the unreachable objects, until you think about
it - the reason it pays isn't actually obvious.
Suppose we create objects A, B, C in that order. They appear in the young
generation in the same order. If B points to A, and C to B, and C is
reachable from outside, then the adjusted refcounts will be 0, 0, and 1
respectively.
When move_unreachable finds A, A is moved to the unreachable list. The
same for B when it's first encountered. Then C is traversed, B is moved
_back_ to the reachable list. B is eventually traversed, and then A is
moved back to the reachable list.
So instead of not moving at all, the reachable objects B and A are moved
twice each. Why is this a win? A straightforward algorithm to move the
reachable objects instead would move A, B, and C once each.
The key is that this dance leaves the objects in order C, B, A - it's
reversed from the original order. On all _subsequent_ scans, none of
them will move. Since most objects aren't in cycles, this can save an
unbounded number of moves across an unbounded number of later collections.
It can cost more only the first time the chain is scanned.
Drawback: move_unreachable is also used to find out what's still trash
after finalizers may resurrect objects. In _that_ case most unreachable
objects will remain unreachable, so it would be more efficient to move
the reachable objects instead. But this is a one-time cost, probably not
worth complicating the code to speed just a little.
------------------------------------------------------------------------ */