Issue #10576: Add a progress callback to gcmodule

This commit is contained in:
Kristján Valur Jónsson 2012-04-15 11:41:32 +00:00
parent c014df7edf
commit 69c635266e
4 changed files with 249 additions and 9 deletions

View File

@ -153,8 +153,8 @@ The :mod:`gc` module provides the following functions:
.. versionadded:: 3.1
The following variable is provided for read-only access (you can mutate its
value but should not rebind it):
The following variables are provided for read-only access (you can mutate the
values but should not rebind them):
.. data:: garbage
@ -183,6 +183,41 @@ value but should not rebind it):
:const:`DEBUG_UNCOLLECTABLE` is set, in addition all uncollectable objects
are printed.
.. data:: callbacks
A list of callbacks that will be invoked by the garbage collector before and
after collection. The callbacks will be called with two arguments,
:arg:`phase` and :arg:`info`.
:arg:`phase` can one of two values:
"start": The garbage collection is about to start.
"stop": The garbage collection has finished.
:arg:`info` provides more information for the callback. The following
keys are currently defined:
"generation": The oldest generation being collected.
"collected": When :arg:`phase` is "stop", the number of objects
successfully collected.
"uncollectable": when :arg:`phase` is "stop", the number of objects
that could not be collected and were put in :data:`garbage`.
Applications can add their own callbacks to this list. The primary
use cases are:
Gathering statistics about garbage collection, such as how often
various generations are collected, and how long the collection
takes.
Allowing applications to identify and clear their own uncollectable
types when they appear in :data:`garbage`.
.. versionadded:: 3.3
The following constants are provided for use with :func:`set_debug`:

View File

@ -32,6 +32,20 @@ class GC_Detector(object):
# gc collects it.
self.wr = weakref.ref(C1055820(666), it_happened)
class Uncollectable(object):
"""Create a reference cycle with multiple __del__ methods.
An object in a reference cycle will never have zero references,
and so must be garbage collected. If one or more objects in the
cycle have __del__ methods, the gc refuses to guess an order,
and leaves the cycle uncollected."""
def __init__(self, partner=None):
if partner is None:
self.partner = Uncollectable(partner=self)
else:
self.partner = partner
def __del__(self):
pass
### Tests
###############################################################################
@ -528,6 +542,126 @@ class GCTests(unittest.TestCase):
self.assertNotIn(b"uncollectable objects at shutdown", stderr)
class GCCallbackTests(unittest.TestCase):
def setUp(self):
# Save gc state and disable it.
self.enabled = gc.isenabled()
gc.disable()
self.debug = gc.get_debug()
gc.set_debug(0)
gc.callbacks.append(self.cb1)
gc.callbacks.append(self.cb2)
def tearDown(self):
# Restore gc state
del self.visit
gc.callbacks.remove(self.cb1)
gc.callbacks.remove(self.cb2)
gc.set_debug(self.debug)
if self.enabled:
gc.enable()
# destroy any uncollectables
gc.collect()
for obj in gc.garbage:
if isinstance(obj, Uncollectable):
obj.partner = None
del gc.garbage[:]
gc.collect()
othergarbage = []
def preclean(self):
# Remove all fluff from the system. Invoke this function
# manually rather than through self.setUp() for maximum
# safety.
self.visit = []
gc.collect()
garbage, gc.garbage[:] = gc.garbage[:], []
self.othergarbage.append(garbage)
self.visit = []
def cb1(self, phase, info):
self.visit.append((1, phase, dict(info)))
def cb2(self, phase, info):
self.visit.append((2, phase, dict(info)))
if phase == "stop" and hasattr(self, "cleanup"):
# Clean Uncollectable from garbage
uc = [e for e in gc.garbage if isinstance(e, Uncollectable)]
gc.garbage[:] = [e for e in gc.garbage
if not isinstance(e, Uncollectable)]
for e in uc:
e.partner = None
def testCollect(self):
self.preclean()
gc.collect()
# Algorithmically verify the contents of self.visit
# because it is long and tortuous.
# Count the number of visits to each callback
n = [v[0] for v in self.visit]
n1 = [i for i in n if i == 1]
n2 = [i for i in n if i == 2]
self.assertEqual(n1, [1]*2)
self.assertEqual(n2, [2]*2)
# Count that we got the right number of start and stop callbacks.
n = [v[1] for v in self.visit]
n1 = [i for i in n if i == "start"]
n2 = [i for i in n if i == "stop"]
self.assertEqual(n1, ["start"]*2)
self.assertEqual(n2, ["stop"]*2)
# Check that we got the right info dict for all callbacks
for v in self.visit:
info = v[2]
self.assertTrue("generation" in info)
self.assertTrue("collected" in info)
self.assertTrue("uncollectable" in info)
def testCollectGen(self):
self.preclean()
gc.collect(2)
for v in self.visit:
info = v[2]
self.assertEqual(info["generation"], 2)
def testCollectGarbage(self):
self.preclean()
# Each of these cause four objects to be garbage: Two
# Uncolectables and their instance dicts.
Uncollectable()
Uncollectable()
C1055820(666)
gc.collect()
for v in self.visit:
if v[1] != "stop":
continue
info = v[2]
self.assertEqual(info["collected"], 2)
self.assertEqual(info["uncollectable"], 8)
# We should now have the Uncollectables in gc.garbage
self.assertEqual(len(gc.garbage), 4)
for e in gc.garbage:
self.assertIsInstance(e, Uncollectable)
# Now, let our callback handle the Uncollectable instances
self.cleanup=True
self.visit = []
gc.garbage[:] = []
gc.collect()
for v in self.visit:
if v[1] != "stop":
continue
info = v[2]
self.assertEqual(info["collected"], 0)
self.assertEqual(info["uncollectable"], 4)
# Uncollectables should be gone
self.assertEqual(len(gc.garbage), 0)
class GCTogglingTests(unittest.TestCase):
def setUp(self):
gc.enable()
@ -681,7 +815,7 @@ def test_main():
try:
gc.collect() # Delete 2nd generation garbage
run_unittest(GCTests, GCTogglingTests)
run_unittest(GCTests, GCTogglingTests, GCCallbackTests)
finally:
gc.set_debug(debug)
# test gc.enable() even if GC is disabled by default

View File

@ -275,6 +275,9 @@ Library
- Issue #14310: Sockets can now be with other processes on Windows using
the api socket.socket.share() and socket.fromshare().
- Issue #10576: The gc module now has a 'callbacks' member that will get
called when garbage collection takes place.
Build
-----

View File

@ -65,14 +65,17 @@ static PyObject *garbage = NULL;
/* Python string to use if unhandled exception occurs */
static PyObject *gc_str = NULL;
/* This is the number of objects who survived the last full collection. It
/* a list of callbacks to be invoked when collection is performed */
static PyObject *callbacks = NULL;
/* This is the number of objects that survived the last full collection. It
approximates the number of long lived objects tracked by the GC.
(by "full collection", we mean a collection of the oldest generation).
*/
static Py_ssize_t long_lived_total = 0;
/* This is the number of objects who survived all "non-full" collections,
/* This is the number of objects that survived all "non-full" collections,
and are awaiting to undergo a full collection for the first time.
*/
@ -787,7 +790,7 @@ get_time(void)
/* This is the main function. Read this to understand how the
* collection process works. */
static Py_ssize_t
collect(int generation)
collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable)
{
int i;
Py_ssize_t m = 0; /* # objects collected */
@ -935,9 +938,64 @@ collect(int generation)
PyErr_WriteUnraisable(gc_str);
Py_FatalError("unexpected exception during garbage collection");
}
if (n_collected)
*n_collected = m;
if (n_uncollectable)
*n_uncollectable = n;
return n+m;
}
/* Invoke progress callbacks to notify clients that garbage collection
* is starting or stopping
*/
static void
invoke_gc_callback(const char *phase, int generation,
Py_ssize_t collected, Py_ssize_t uncollectable)
{
Py_ssize_t i;
PyObject *info = NULL;
/* we may get called very early */
if (callbacks == NULL)
return;
/* The local variable cannot be rebound, check it for sanity */
assert(callbacks != NULL && PyList_CheckExact(callbacks));
if (PyList_GET_SIZE(callbacks) != 0) {
info = Py_BuildValue("{sisnsn}",
"generation", generation,
"collected", collected,
"uncollectable", uncollectable);
if (info == NULL) {
PyErr_WriteUnraisable(NULL);
return;
}
}
for (i=0; i<PyList_GET_SIZE(callbacks); i++) {
PyObject *r, *cb = PyList_GET_ITEM(callbacks, i);
Py_INCREF(cb); /* make sure cb doesn't go away */
r = PyObject_CallFunction(cb, "sO", phase, info);
Py_XDECREF(r);
if (r == NULL)
PyErr_WriteUnraisable(cb);
Py_DECREF(cb);
}
Py_XDECREF(info);
}
/* Perform garbage collection of a generation and invoke
* progress callbacks.
*/
static Py_ssize_t
collect_with_callback(int generation)
{
Py_ssize_t result, collected, uncollectable;
invoke_gc_callback("start", generation, 0, 0);
result = collect(generation, &collected, &uncollectable);
invoke_gc_callback("stop", generation, collected, uncollectable);
return result;
}
static Py_ssize_t
collect_generations(void)
{
@ -956,7 +1014,7 @@ collect_generations(void)
if (i == NUM_GENERATIONS - 1
&& long_lived_pending < long_lived_total / 4)
continue;
n = collect(i);
n = collect_with_callback(i);
break;
}
}
@ -1027,7 +1085,7 @@ gc_collect(PyObject *self, PyObject *args, PyObject *kws)
n = 0; /* already collecting, don't do anything */
else {
collecting = 1;
n = collect(genarg);
n = collect_with_callback(genarg);
collecting = 0;
}
@ -1320,6 +1378,15 @@ PyInit_gc(void)
if (PyModule_AddObject(m, "garbage", garbage) < 0)
return NULL;
if (callbacks == NULL) {
callbacks = PyList_New(0);
if (callbacks == NULL)
return NULL;
}
Py_INCREF(callbacks);
if (PyModule_AddObject(m, "callbacks", callbacks) < 0)
return NULL;
/* Importing can't be done in collect() because collect()
* can be called via PyGC_Collect() in Py_Finalize().
* This wouldn't be a problem, except that <initialized> is
@ -1352,7 +1419,7 @@ PyGC_Collect(void)
n = 0; /* already collecting, don't do anything */
else {
collecting = 1;
n = collect(NUM_GENERATIONS - 1);
n = collect_with_callback(NUM_GENERATIONS - 1);
collecting = 0;
}
@ -1389,6 +1456,7 @@ _PyGC_Fini(void)
Py_XDECREF(bytes);
}
}
Py_CLEAR(callbacks);
}
/* for debugging */