bpo-42671: Make Python finalization deterministic

Make the Python finalization more deterministic:

* First, clear the __main__ module.
* Then, clear modules from the most recently imported to the least
  recently imported: reversed(sys.modules).
* builtins and sys modumes are always cleared last.
* Module attributes are set to None from the most recently defined to the
  least recently defined: reversed(module.__dict__).

Changes:

* finalize_modules() no longer uses a list of weak references to
  modules while clearing sys.modules dict.
* When -vv command line option is used, the module name is now also
  logged, not only the attribute name.
* test_module.test_module_finalization_at_shutdown(): final_a.x is
  now None when final_a.c is cleared.
* test_sys.test_sys_ignores_cleaning_up_user_data(): the exception is
  no longer silently ignored. Rename the test to
  test_sys_cleaning_up_user_data().
* test_threading.test_main_thread_during_shutdown() keeps a reference
  to threading functions since threading module variables are cleared
  before RefCycle object is deleted by the garbage collector.
This commit is contained in:
Victor Stinner 2020-12-17 12:11:09 +01:00
parent 074ad5123f
commit 9870eb3702
7 changed files with 206 additions and 173 deletions

View File

@ -29,7 +29,7 @@ Py_DEPRECATED(3.2) PyAPI_FUNC(const char *) PyModule_GetFilename(PyObject *);
PyAPI_FUNC(PyObject *) PyModule_GetFilenameObject(PyObject *);
#ifndef Py_LIMITED_API
PyAPI_FUNC(void) _PyModule_Clear(PyObject *);
PyAPI_FUNC(void) _PyModule_ClearDict(PyObject *);
PyAPI_FUNC(void) _PyModule_ClearDict(PyObject *, PyObject *);
PyAPI_FUNC(int) _PyModuleSpec_IsInitializing(PyObject *);
#endif
PyAPI_FUNC(struct PyModuleDef*) PyModule_GetDef(PyObject*);

View File

@ -266,14 +266,15 @@ a = A(destroyed)"""
'{!r} does not end with {!r}'.format(r, ends_with))
def test_module_finalization_at_shutdown(self):
# Module globals and builtins should still be available during shutdown
# Most module globals and builtins should still be available during
# shutdown.
rc, out, err = assert_python_ok("-c", "from test import final_a")
self.assertFalse(err)
lines = out.splitlines()
self.assertEqual(set(lines), {
b"x = a",
b"x = b",
b"final_a.x = a",
b"final_a.x = None",
b"final_b.x = b",
b"len = len",
b"shutil.rmtree = rmtree"})

View File

@ -901,8 +901,8 @@ class SysModuleTest(unittest.TestCase):
self.assertIn(b'sys.flags', out[0])
self.assertIn(b'sys.float_info', out[1])
def test_sys_ignores_cleaning_up_user_data(self):
code = """if 1:
def test_sys_cleaning_up_user_data(self):
code = textwrap.dedent("""
import struct, sys
class C:
@ -912,11 +912,11 @@ class SysModuleTest(unittest.TestCase):
self.pack('I', -42)
sys.x = C()
"""
""")
rc, stdout, stderr = assert_python_ok('-c', code)
self.assertEqual(rc, 0)
self.assertEqual(stdout.rstrip(), b"")
self.assertEqual(stderr.rstrip(), b"")
self.assertIn(b'Exception ignored in: <function C.__del__ at ', stderr)
@unittest.skipUnless(hasattr(sys, 'getandroidapilevel'),
'need sys.getandroidapilevel()')

View File

@ -615,18 +615,23 @@ class ThreadTests(BaseTestCase):
code = """if 1:
import gc, threading
main_thread = threading.current_thread()
assert main_thread is threading.main_thread() # sanity check
class RefCycle:
def __init__(self):
self.cycle = self
self.current_thread = threading.current_thread
self.main_thread = threading.main_thread
self.enumerate = threading.enumerate
self.thread = self.current_thread()
assert self.thread is threading.main_thread() # sanity check
def __del__(self):
print("GC:",
threading.current_thread() is main_thread,
threading.main_thread() is main_thread,
threading.enumerate() == [main_thread])
self.current_thread() is self.thread,
self.main_thread() is self.thread,
self.enumerate() == [self.thread])
RefCycle()
gc.collect() # sanity check

View File

@ -0,0 +1,8 @@
Make the Python finalization more deterministic:
* First, clear the ``__main__`` module.
* Then, clear modules from the most recently imported to the least
recently imported: ``reversed(sys.modules)``.
* :mod:`builtins` and :mod:`sys` modules are always cleared last.
* Module attributes are set to None from the most recently defined to the
least recently defined: ``reversed(module.__dict__)``.

View File

@ -559,14 +559,23 @@ PyModule_GetState(PyObject* m)
void
_PyModule_Clear(PyObject *m)
{
PyObject *d = ((PyModuleObject *)m)->md_dict;
if (d != NULL)
_PyModule_ClearDict(d);
PyModuleObject *module = (PyModuleObject *)m;
_PyModule_ClearDict(module->md_name, module->md_dict);
}
void
_PyModule_ClearDict(PyObject *d)
_PyModule_ClearDict(PyObject *module_name, PyObject *dict)
{
int verbose = _Py_GetConfig()->verbose;
if (verbose) {
PySys_FormatStderr("# cleanup[3] wiping %S module\n", module_name);
}
// If the module has no dict: there is nothing to do.
if (dict == NULL) {
return;
}
/* To make the execution order of destructors for global
objects a bit more predictable, we first zap all objects
whose name starts with a single underscore, before we clear
@ -574,50 +583,64 @@ _PyModule_ClearDict(PyObject *d)
None, rather than deleting them from the dictionary, to
avoid rehashing the dictionary (to some extent). */
Py_ssize_t pos;
PyObject *key, *value;
for (int step=1; step <= 2; step++) {
PyObject *reversed = PyObject_CallOneArg((PyObject*)&PyReversed_Type, dict);
if (reversed == NULL) {
PyErr_WriteUnraisable(NULL);
return;
}
PyObject *keys = PyObject_CallOneArg((PyObject*)&PyList_Type, reversed);
Py_DECREF(reversed);
if (keys == NULL) {
PyErr_WriteUnraisable(NULL);
return;
}
PyObject *iter = PyObject_GetIter(keys);
Py_DECREF(keys);
if (iter == NULL) {
PyErr_WriteUnraisable(NULL);
return;
}
int verbose = _Py_GetConfig()->verbose;
/* First, clear only names starting with a single underscore */
PyObject *key;
while ((key = PyIter_Next(iter))) {
assert(!PyErr_Occurred());
PyObject *value = PyObject_GetItem(dict, key);
Py_XDECREF(value); // only the value pointer is useful
if (value == Py_None) {
continue;
}
if (value == NULL) {
// ignore error
PyErr_Clear();
}
/* First, clear only names starting with a single underscore */
pos = 0;
while (PyDict_Next(d, &pos, &key, &value)) {
if (value != Py_None && PyUnicode_Check(key)) {
if (PyUnicode_READ_CHAR(key, 0) == '_' &&
PyUnicode_READ_CHAR(key, 1) != '_') {
if (verbose > 1) {
const char *s = PyUnicode_AsUTF8(key);
if (s != NULL)
PySys_WriteStderr("# clear[1] %s\n", s);
else
PyErr_Clear();
if (PyUnicode_Check(key)) {
if (step == 1) {
if (PyUnicode_READ_CHAR(key, 0) != '_' ||
PyUnicode_READ_CHAR(key, 1) == '_') {
continue;
}
}
if (PyDict_SetItem(d, key, Py_None) != 0) {
PyErr_WriteUnraisable(NULL);
else {
/* Step 2: clear all names except for __builtins__ */
if (_PyUnicode_EqualToASCIIString(key, "__builtins__")) {
continue;
}
}
}
}
}
/* Next, clear all names except for __builtins__ */
pos = 0;
while (PyDict_Next(d, &pos, &key, &value)) {
if (value != Py_None && PyUnicode_Check(key)) {
if (PyUnicode_READ_CHAR(key, 0) != '_' ||
!_PyUnicode_EqualToASCIIString(key, "__builtins__"))
{
if (verbose > 1) {
const char *s = PyUnicode_AsUTF8(key);
if (s != NULL)
PySys_WriteStderr("# clear[2] %s\n", s);
else
PyErr_Clear();
}
if (PyDict_SetItem(d, key, Py_None) != 0) {
PyErr_WriteUnraisable(NULL);
}
if (verbose > 1) {
PySys_FormatStderr("# clear[%i] %S.%S\n",
step, module_name, key);
}
assert(!PyErr_Occurred());
if (PyDict_SetItem(dict, key, Py_None) != 0) {
PyErr_WriteUnraisable(NULL);
}
Py_DECREF(key);
}
Py_DECREF(iter);
}
/* Note: we leave __builtins__ in place, so that destructors

View File

@ -1216,8 +1216,10 @@ _Py_InitializeMain(void)
static void
finalize_modules_delete_special(PyThreadState *tstate, int verbose)
finalize_modules_delete_special(PyThreadState *tstate)
{
int verbose = _PyInterpreterState_GetConfig(tstate->interp)->verbose;
// List of names to clear in sys
static const char * const sys_deletes[] = {
"path", "argv", "ps1", "ps2",
@ -1272,75 +1274,99 @@ finalize_modules_delete_special(PyThreadState *tstate, int verbose)
}
static PyObject*
finalize_remove_modules(PyObject *modules, int verbose)
static void
finalize_wipe_module(PyInterpreterState *interp, PyObject *mod, PyObject *name,
int verbose)
{
PyObject *weaklist = PyList_New(0);
if (weaklist == NULL) {
PyErr_WriteUnraisable(NULL);
assert(PyModule_Check(mod));
PyObject *dict = PyModule_GetDict(mod);
if (dict == interp->builtins || dict == interp->sysdict) {
return;
}
#define STORE_MODULE_WEAKREF(name, mod) \
if (weaklist != NULL) { \
PyObject *wr = PyWeakref_NewRef(mod, NULL); \
if (wr) { \
PyObject *tup = PyTuple_Pack(2, name, wr); \
if (!tup || PyList_Append(weaklist, tup) < 0) { \
PyErr_WriteUnraisable(NULL); \
} \
Py_XDECREF(tup); \
Py_DECREF(wr); \
} \
else { \
PyErr_WriteUnraisable(NULL); \
} \
}
_PyModule_Clear(mod);
}
static void
finalize_remove_modules(PyThreadState *tstate)
{
PyInterpreterState *interp = tstate->interp;
int verbose = _PyInterpreterState_GetConfig(interp)->verbose;
#define CLEAR_MODULE(name, mod) \
do { \
if (PyModule_Check(mod)) { \
if (verbose && PyUnicode_Check(name)) { \
PySys_FormatStderr("# cleanup[2] removing %U\n", name); \
} \
STORE_MODULE_WEAKREF(name, mod); \
if (PyObject_SetItem(modules, name, Py_None) < 0) { \
PyErr_WriteUnraisable(NULL); \
} \
}
finalize_wipe_module(interp, mod, name, verbose); \
} \
if (PyObject_SetItem(modules, name, Py_None) < 0) { \
PyErr_WriteUnraisable(NULL); \
} \
} while (0)
if (PyDict_CheckExact(modules)) {
Py_ssize_t pos = 0;
PyObject *key, *value;
while (PyDict_Next(modules, &pos, &key, &value)) {
CLEAR_MODULE(key, value);
}
}
else {
PyObject *iterator = PyObject_GetIter(modules);
if (iterator == NULL) {
PyErr_WriteUnraisable(NULL);
PyObject *modules = interp->modules;
// Wipe the __main__ module
PyObject *main_name = PyUnicode_FromString("__main__");
if (main_name != NULL) {
PyObject *main_mod = PyObject_GetItem(modules, main_name);
if (main_mod != NULL) {
CLEAR_MODULE(main_name, main_mod);
Py_DECREF(main_mod);
// Break reference cycles (if nay)
_PyGC_CollectNoFail(tstate);
}
else {
PyObject *key;
while ((key = PyIter_Next(iterator))) {
PyObject *value = PyObject_GetItem(modules, key);
if (value == NULL) {
PyErr_WriteUnraisable(NULL);
continue;
}
CLEAR_MODULE(key, value);
Py_DECREF(value);
Py_DECREF(key);
}
if (PyErr_Occurred()) {
PyErr_WriteUnraisable(NULL);
}
Py_DECREF(iterator);
PyErr_Clear();
}
Py_DECREF(main_name);
}
else {
PyErr_Clear();
}
#undef CLEAR_MODULE
#undef STORE_MODULE_WEAKREF
return weaklist;
PyObject *reversed = PyObject_CallOneArg((PyObject*)&PyReversed_Type, modules);
if (reversed == NULL) {
PyErr_WriteUnraisable(NULL);
return;
}
PyObject *keys = PyObject_CallOneArg((PyObject*)&PyList_Type, reversed);
Py_DECREF(reversed);
if (keys == NULL) {
PyErr_WriteUnraisable(NULL);
return;
}
PyObject *iter = PyObject_GetIter(keys);
Py_DECREF(keys);
if (iter == NULL) {
PyErr_WriteUnraisable(NULL);
return;
}
PyObject *name;
while ((name = PyIter_Next(iter))) {
PyObject *mod = PyObject_GetItem(modules, name);
if (mod == NULL) {
PyErr_WriteUnraisable(NULL);
continue;
}
PyObject *dict = PyModule_Check(mod) ? PyModule_GetDict(mod) : NULL;
// builtins and sys modules are cleared later
if (dict != interp->builtins && dict != interp->sysdict) {
CLEAR_MODULE(name, mod);
}
Py_DECREF(mod);
Py_DECREF(name);
}
if (PyErr_Occurred()) {
PyErr_WriteUnraisable(NULL);
}
Py_DECREF(iter);
#undef CLEAR_MODULE
}
@ -1376,46 +1402,27 @@ finalize_restore_builtins(PyThreadState *tstate)
static void
finalize_modules_clear_weaklist(PyInterpreterState *interp,
PyObject *weaklist, int verbose)
{
// First clear modules imported later
for (Py_ssize_t i = PyList_GET_SIZE(weaklist) - 1; i >= 0; i--) {
PyObject *tup = PyList_GET_ITEM(weaklist, i);
PyObject *name = PyTuple_GET_ITEM(tup, 0);
PyObject *mod = PyWeakref_GET_OBJECT(PyTuple_GET_ITEM(tup, 1));
if (mod == Py_None) {
continue;
}
assert(PyModule_Check(mod));
PyObject *dict = PyModule_GetDict(mod);
if (dict == interp->builtins || dict == interp->sysdict) {
continue;
}
Py_INCREF(mod);
if (verbose && PyUnicode_Check(name)) {
PySys_FormatStderr("# cleanup[3] wiping %U\n", name);
}
_PyModule_Clear(mod);
Py_DECREF(mod);
}
}
static void
finalize_clear_sys_builtins_dict(PyInterpreterState *interp, int verbose)
finalize_clear_sys_builtins_dict(PyInterpreterState *interp)
{
// Clear sys dict
if (verbose) {
PySys_FormatStderr("# cleanup[3] wiping sys\n");
PyObject *name = PyUnicode_FromString("sys");
if (name != NULL) {
_PyModule_ClearDict(name, interp->sysdict);
Py_DECREF(name);
}
else {
PyErr_WriteUnraisable(NULL);
}
_PyModule_ClearDict(interp->sysdict);
// Clear builtins dict
if (verbose) {
PySys_FormatStderr("# cleanup[3] wiping builtins\n");
name = PyUnicode_FromString("builtins");
if (name != NULL) {
_PyModule_ClearDict(name, interp->builtins);
Py_DECREF(name);
}
else {
PyErr_WriteUnraisable(NULL);
}
_PyModule_ClearDict(interp->builtins);
}
@ -1424,12 +1431,10 @@ static void
finalize_modules(PyThreadState *tstate)
{
PyInterpreterState *interp = tstate->interp;
PyObject *modules = interp->modules;
if (modules == NULL) {
if (interp->modules == NULL) {
// Already done
return;
}
int verbose = _PyInterpreterState_GetConfig(interp)->verbose;
// Delete some special builtins._ and sys attributes first. These are
// common places where user values hide and people complain when their
@ -1438,19 +1443,29 @@ finalize_modules(PyThreadState *tstate)
// destruction order. Sigh.
//
// XXX Perhaps these precautions are obsolete. Who knows?
finalize_modules_delete_special(tstate, verbose);
finalize_modules_delete_special(tstate);
// Remove all modules from sys.modules, hoping that garbage collection
// can reclaim most of them: set all sys.modules values to None.
//
// We prepare a list which will receive (name, weakref) tuples of
// modules when they are removed from sys.modules. The name is used
// for diagnosis messages (in verbose mode), while the weakref helps
// detect those modules which have been held alive.
PyObject *weaklist = finalize_remove_modules(modules, verbose);
// * First, clear the __main__ module.
// * Then, clear modules from the most recently imported to the least
// recently imported: reversed(sys.modules).
// * Exception: leave builtins and sys modules unchanged.
// * Module attributes are set to None from the most recently defined to the
// least recently defined: reversed(module.__dict__).
//
// The special treatment of "builtins" here is because even
// when it's not referenced as a module, its dictionary is
// referenced by almost every module's __builtins__. Since
// deleting a module clears its dictionary (even if there are
// references left to it), we need to delete the "builtins"
// module last. Likewise, we don't delete sys until the very
// end because it is implicitly referenced (e.g. by print).
finalize_remove_modules(tstate);
// Clear the modules dict
finalize_clear_modules_dict(modules);
finalize_clear_modules_dict(interp->modules);
// Restore the original builtins dict, to ensure that any
// user data gets cleared.
@ -1463,27 +1478,8 @@ finalize_modules(PyThreadState *tstate)
// machinery.
_PyGC_DumpShutdownStats(tstate);
if (weaklist != NULL) {
// Now, if there are any modules left alive, clear their globals to
// minimize potential leaks. All C extension modules actually end
// up here, since they are kept alive in the interpreter state.
//
// The special treatment of "builtins" here is because even
// when it's not referenced as a module, its dictionary is
// referenced by almost every module's __builtins__. Since
// deleting a module clears its dictionary (even if there are
// references left to it), we need to delete the "builtins"
// module last. Likewise, we don't delete sys until the very
// end because it is implicitly referenced (e.g. by print).
//
// Since dict is ordered in CPython 3.6+, modules are saved in
// importing order. First clear modules imported later.
finalize_modules_clear_weaklist(interp, weaklist, verbose);
Py_DECREF(weaklist);
}
// Clear sys and builtins modules dict
finalize_clear_sys_builtins_dict(interp, verbose);
finalize_clear_sys_builtins_dict(interp);
// Clear module dict copies stored in the interpreter state:
// clear PyInterpreterState.modules_by_index and