bpo-42671: Make Python finalization deterministic

Make the Python finalization more deterministic:

* First, clear the __main__ module.
* Then, clear modules from the most recently imported to the least
  recently imported: reversed(sys.modules).
* builtins and sys modumes are always cleared last.
* Module attributes are set to None from the most recently defined to the
  least recently defined: reversed(module.__dict__).

Changes:

* finalize_modules() no longer uses a list of weak references to
  modules while clearing sys.modules dict.
* When -vv command line option is used, the module name is now also
  logged, not only the attribute name.
* test_module.test_module_finalization_at_shutdown(): final_a.x is
  now None when final_a.c is cleared.
* test_sys.test_sys_ignores_cleaning_up_user_data(): the exception is
  no longer silently ignored. Rename the test to
  test_sys_cleaning_up_user_data().
* test_threading.test_main_thread_during_shutdown() keeps a reference
  to threading functions since threading module variables are cleared
  before RefCycle object is deleted by the garbage collector.
This commit is contained in:
Victor Stinner 2020-12-17 12:11:09 +01:00
parent 074ad5123f
commit 9870eb3702
7 changed files with 206 additions and 173 deletions

View File

@ -29,7 +29,7 @@ Py_DEPRECATED(3.2) PyAPI_FUNC(const char *) PyModule_GetFilename(PyObject *);
PyAPI_FUNC(PyObject *) PyModule_GetFilenameObject(PyObject *); PyAPI_FUNC(PyObject *) PyModule_GetFilenameObject(PyObject *);
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API
PyAPI_FUNC(void) _PyModule_Clear(PyObject *); PyAPI_FUNC(void) _PyModule_Clear(PyObject *);
PyAPI_FUNC(void) _PyModule_ClearDict(PyObject *); PyAPI_FUNC(void) _PyModule_ClearDict(PyObject *, PyObject *);
PyAPI_FUNC(int) _PyModuleSpec_IsInitializing(PyObject *); PyAPI_FUNC(int) _PyModuleSpec_IsInitializing(PyObject *);
#endif #endif
PyAPI_FUNC(struct PyModuleDef*) PyModule_GetDef(PyObject*); PyAPI_FUNC(struct PyModuleDef*) PyModule_GetDef(PyObject*);

View File

@ -266,14 +266,15 @@ a = A(destroyed)"""
'{!r} does not end with {!r}'.format(r, ends_with)) '{!r} does not end with {!r}'.format(r, ends_with))
def test_module_finalization_at_shutdown(self): def test_module_finalization_at_shutdown(self):
# Module globals and builtins should still be available during shutdown # Most module globals and builtins should still be available during
# shutdown.
rc, out, err = assert_python_ok("-c", "from test import final_a") rc, out, err = assert_python_ok("-c", "from test import final_a")
self.assertFalse(err) self.assertFalse(err)
lines = out.splitlines() lines = out.splitlines()
self.assertEqual(set(lines), { self.assertEqual(set(lines), {
b"x = a", b"x = a",
b"x = b", b"x = b",
b"final_a.x = a", b"final_a.x = None",
b"final_b.x = b", b"final_b.x = b",
b"len = len", b"len = len",
b"shutil.rmtree = rmtree"}) b"shutil.rmtree = rmtree"})

View File

@ -901,8 +901,8 @@ class SysModuleTest(unittest.TestCase):
self.assertIn(b'sys.flags', out[0]) self.assertIn(b'sys.flags', out[0])
self.assertIn(b'sys.float_info', out[1]) self.assertIn(b'sys.float_info', out[1])
def test_sys_ignores_cleaning_up_user_data(self): def test_sys_cleaning_up_user_data(self):
code = """if 1: code = textwrap.dedent("""
import struct, sys import struct, sys
class C: class C:
@ -912,11 +912,11 @@ class SysModuleTest(unittest.TestCase):
self.pack('I', -42) self.pack('I', -42)
sys.x = C() sys.x = C()
""" """)
rc, stdout, stderr = assert_python_ok('-c', code) rc, stdout, stderr = assert_python_ok('-c', code)
self.assertEqual(rc, 0) self.assertEqual(rc, 0)
self.assertEqual(stdout.rstrip(), b"") self.assertEqual(stdout.rstrip(), b"")
self.assertEqual(stderr.rstrip(), b"") self.assertIn(b'Exception ignored in: <function C.__del__ at ', stderr)
@unittest.skipUnless(hasattr(sys, 'getandroidapilevel'), @unittest.skipUnless(hasattr(sys, 'getandroidapilevel'),
'need sys.getandroidapilevel()') 'need sys.getandroidapilevel()')

View File

@ -615,18 +615,23 @@ class ThreadTests(BaseTestCase):
code = """if 1: code = """if 1:
import gc, threading import gc, threading
main_thread = threading.current_thread()
assert main_thread is threading.main_thread() # sanity check
class RefCycle: class RefCycle:
def __init__(self): def __init__(self):
self.cycle = self self.cycle = self
self.current_thread = threading.current_thread
self.main_thread = threading.main_thread
self.enumerate = threading.enumerate
self.thread = self.current_thread()
assert self.thread is threading.main_thread() # sanity check
def __del__(self): def __del__(self):
print("GC:", print("GC:",
threading.current_thread() is main_thread, self.current_thread() is self.thread,
threading.main_thread() is main_thread, self.main_thread() is self.thread,
threading.enumerate() == [main_thread]) self.enumerate() == [self.thread])
RefCycle() RefCycle()
gc.collect() # sanity check gc.collect() # sanity check

View File

@ -0,0 +1,8 @@
Make the Python finalization more deterministic:
* First, clear the ``__main__`` module.
* Then, clear modules from the most recently imported to the least
recently imported: ``reversed(sys.modules)``.
* :mod:`builtins` and :mod:`sys` modules are always cleared last.
* Module attributes are set to None from the most recently defined to the
least recently defined: ``reversed(module.__dict__)``.

View File

@ -559,14 +559,23 @@ PyModule_GetState(PyObject* m)
void void
_PyModule_Clear(PyObject *m) _PyModule_Clear(PyObject *m)
{ {
PyObject *d = ((PyModuleObject *)m)->md_dict; PyModuleObject *module = (PyModuleObject *)m;
if (d != NULL) _PyModule_ClearDict(module->md_name, module->md_dict);
_PyModule_ClearDict(d);
} }
void void
_PyModule_ClearDict(PyObject *d) _PyModule_ClearDict(PyObject *module_name, PyObject *dict)
{ {
int verbose = _Py_GetConfig()->verbose;
if (verbose) {
PySys_FormatStderr("# cleanup[3] wiping %S module\n", module_name);
}
// If the module has no dict: there is nothing to do.
if (dict == NULL) {
return;
}
/* To make the execution order of destructors for global /* To make the execution order of destructors for global
objects a bit more predictable, we first zap all objects objects a bit more predictable, we first zap all objects
whose name starts with a single underscore, before we clear whose name starts with a single underscore, before we clear
@ -574,50 +583,64 @@ _PyModule_ClearDict(PyObject *d)
None, rather than deleting them from the dictionary, to None, rather than deleting them from the dictionary, to
avoid rehashing the dictionary (to some extent). */ avoid rehashing the dictionary (to some extent). */
Py_ssize_t pos; for (int step=1; step <= 2; step++) {
PyObject *key, *value; PyObject *reversed = PyObject_CallOneArg((PyObject*)&PyReversed_Type, dict);
if (reversed == NULL) {
int verbose = _Py_GetConfig()->verbose; PyErr_WriteUnraisable(NULL);
return;
}
PyObject *keys = PyObject_CallOneArg((PyObject*)&PyList_Type, reversed);
Py_DECREF(reversed);
if (keys == NULL) {
PyErr_WriteUnraisable(NULL);
return;
}
PyObject *iter = PyObject_GetIter(keys);
Py_DECREF(keys);
if (iter == NULL) {
PyErr_WriteUnraisable(NULL);
return;
}
/* First, clear only names starting with a single underscore */ /* First, clear only names starting with a single underscore */
pos = 0; PyObject *key;
while (PyDict_Next(d, &pos, &key, &value)) { while ((key = PyIter_Next(iter))) {
if (value != Py_None && PyUnicode_Check(key)) { assert(!PyErr_Occurred());
if (PyUnicode_READ_CHAR(key, 0) == '_' && PyObject *value = PyObject_GetItem(dict, key);
PyUnicode_READ_CHAR(key, 1) != '_') { Py_XDECREF(value); // only the value pointer is useful
if (verbose > 1) { if (value == Py_None) {
const char *s = PyUnicode_AsUTF8(key); continue;
if (s != NULL) }
PySys_WriteStderr("# clear[1] %s\n", s); if (value == NULL) {
else // ignore error
PyErr_Clear(); PyErr_Clear();
} }
if (PyDict_SetItem(d, key, Py_None) != 0) {
PyErr_WriteUnraisable(NULL);
}
}
}
}
/* Next, clear all names except for __builtins__ */ if (PyUnicode_Check(key)) {
pos = 0; if (step == 1) {
while (PyDict_Next(d, &pos, &key, &value)) {
if (value != Py_None && PyUnicode_Check(key)) {
if (PyUnicode_READ_CHAR(key, 0) != '_' || if (PyUnicode_READ_CHAR(key, 0) != '_' ||
!_PyUnicode_EqualToASCIIString(key, "__builtins__")) PyUnicode_READ_CHAR(key, 1) == '_') {
{ continue;
if (verbose > 1) {
const char *s = PyUnicode_AsUTF8(key);
if (s != NULL)
PySys_WriteStderr("# clear[2] %s\n", s);
else
PyErr_Clear();
} }
if (PyDict_SetItem(d, key, Py_None) != 0) { }
else {
/* Step 2: clear all names except for __builtins__ */
if (_PyUnicode_EqualToASCIIString(key, "__builtins__")) {
continue;
}
}
}
if (verbose > 1) {
PySys_FormatStderr("# clear[%i] %S.%S\n",
step, module_name, key);
}
assert(!PyErr_Occurred());
if (PyDict_SetItem(dict, key, Py_None) != 0) {
PyErr_WriteUnraisable(NULL); PyErr_WriteUnraisable(NULL);
} }
Py_DECREF(key);
} }
} Py_DECREF(iter);
} }
/* Note: we leave __builtins__ in place, so that destructors /* Note: we leave __builtins__ in place, so that destructors

View File

@ -1216,8 +1216,10 @@ _Py_InitializeMain(void)
static void static void
finalize_modules_delete_special(PyThreadState *tstate, int verbose) finalize_modules_delete_special(PyThreadState *tstate)
{ {
int verbose = _PyInterpreterState_GetConfig(tstate->interp)->verbose;
// List of names to clear in sys // List of names to clear in sys
static const char * const sys_deletes[] = { static const char * const sys_deletes[] = {
"path", "argv", "ps1", "ps2", "path", "argv", "ps1", "ps2",
@ -1272,75 +1274,99 @@ finalize_modules_delete_special(PyThreadState *tstate, int verbose)
} }
static PyObject* static void
finalize_remove_modules(PyObject *modules, int verbose) finalize_wipe_module(PyInterpreterState *interp, PyObject *mod, PyObject *name,
int verbose)
{ {
PyObject *weaklist = PyList_New(0); assert(PyModule_Check(mod));
if (weaklist == NULL) { PyObject *dict = PyModule_GetDict(mod);
PyErr_WriteUnraisable(NULL); if (dict == interp->builtins || dict == interp->sysdict) {
return;
} }
#define STORE_MODULE_WEAKREF(name, mod) \ _PyModule_Clear(mod);
if (weaklist != NULL) { \ }
PyObject *wr = PyWeakref_NewRef(mod, NULL); \
if (wr) { \
PyObject *tup = PyTuple_Pack(2, name, wr); \ static void
if (!tup || PyList_Append(weaklist, tup) < 0) { \ finalize_remove_modules(PyThreadState *tstate)
PyErr_WriteUnraisable(NULL); \ {
} \ PyInterpreterState *interp = tstate->interp;
Py_XDECREF(tup); \ int verbose = _PyInterpreterState_GetConfig(interp)->verbose;
Py_DECREF(wr); \
} \
else { \
PyErr_WriteUnraisable(NULL); \
} \
}
#define CLEAR_MODULE(name, mod) \ #define CLEAR_MODULE(name, mod) \
do { \
if (PyModule_Check(mod)) { \ if (PyModule_Check(mod)) { \
if (verbose && PyUnicode_Check(name)) { \ if (verbose && PyUnicode_Check(name)) { \
PySys_FormatStderr("# cleanup[2] removing %U\n", name); \ PySys_FormatStderr("# cleanup[2] removing %U\n", name); \
} \ } \
STORE_MODULE_WEAKREF(name, mod); \ finalize_wipe_module(interp, mod, name, verbose); \
} \
if (PyObject_SetItem(modules, name, Py_None) < 0) { \ if (PyObject_SetItem(modules, name, Py_None) < 0) { \
PyErr_WriteUnraisable(NULL); \ PyErr_WriteUnraisable(NULL); \
} \ } \
} while (0)
PyObject *modules = interp->modules;
// Wipe the __main__ module
PyObject *main_name = PyUnicode_FromString("__main__");
if (main_name != NULL) {
PyObject *main_mod = PyObject_GetItem(modules, main_name);
if (main_mod != NULL) {
CLEAR_MODULE(main_name, main_mod);
Py_DECREF(main_mod);
// Break reference cycles (if nay)
_PyGC_CollectNoFail(tstate);
}
else {
PyErr_Clear();
}
Py_DECREF(main_name);
}
else {
PyErr_Clear();
} }
if (PyDict_CheckExact(modules)) { PyObject *reversed = PyObject_CallOneArg((PyObject*)&PyReversed_Type, modules);
Py_ssize_t pos = 0; if (reversed == NULL) {
PyObject *key, *value;
while (PyDict_Next(modules, &pos, &key, &value)) {
CLEAR_MODULE(key, value);
}
}
else {
PyObject *iterator = PyObject_GetIter(modules);
if (iterator == NULL) {
PyErr_WriteUnraisable(NULL); PyErr_WriteUnraisable(NULL);
return;
} }
else { PyObject *keys = PyObject_CallOneArg((PyObject*)&PyList_Type, reversed);
PyObject *key; Py_DECREF(reversed);
while ((key = PyIter_Next(iterator))) { if (keys == NULL) {
PyObject *value = PyObject_GetItem(modules, key); PyErr_WriteUnraisable(NULL);
if (value == NULL) { return;
}
PyObject *iter = PyObject_GetIter(keys);
Py_DECREF(keys);
if (iter == NULL) {
PyErr_WriteUnraisable(NULL);
return;
}
PyObject *name;
while ((name = PyIter_Next(iter))) {
PyObject *mod = PyObject_GetItem(modules, name);
if (mod == NULL) {
PyErr_WriteUnraisable(NULL); PyErr_WriteUnraisable(NULL);
continue; continue;
} }
CLEAR_MODULE(key, value); PyObject *dict = PyModule_Check(mod) ? PyModule_GetDict(mod) : NULL;
Py_DECREF(value); // builtins and sys modules are cleared later
Py_DECREF(key); if (dict != interp->builtins && dict != interp->sysdict) {
CLEAR_MODULE(name, mod);
}
Py_DECREF(mod);
Py_DECREF(name);
} }
if (PyErr_Occurred()) { if (PyErr_Occurred()) {
PyErr_WriteUnraisable(NULL); PyErr_WriteUnraisable(NULL);
} }
Py_DECREF(iterator); Py_DECREF(iter);
}
}
#undef CLEAR_MODULE #undef CLEAR_MODULE
#undef STORE_MODULE_WEAKREF
return weaklist;
} }
@ -1376,46 +1402,27 @@ finalize_restore_builtins(PyThreadState *tstate)
static void static void
finalize_modules_clear_weaklist(PyInterpreterState *interp, finalize_clear_sys_builtins_dict(PyInterpreterState *interp)
PyObject *weaklist, int verbose)
{
// First clear modules imported later
for (Py_ssize_t i = PyList_GET_SIZE(weaklist) - 1; i >= 0; i--) {
PyObject *tup = PyList_GET_ITEM(weaklist, i);
PyObject *name = PyTuple_GET_ITEM(tup, 0);
PyObject *mod = PyWeakref_GET_OBJECT(PyTuple_GET_ITEM(tup, 1));
if (mod == Py_None) {
continue;
}
assert(PyModule_Check(mod));
PyObject *dict = PyModule_GetDict(mod);
if (dict == interp->builtins || dict == interp->sysdict) {
continue;
}
Py_INCREF(mod);
if (verbose && PyUnicode_Check(name)) {
PySys_FormatStderr("# cleanup[3] wiping %U\n", name);
}
_PyModule_Clear(mod);
Py_DECREF(mod);
}
}
static void
finalize_clear_sys_builtins_dict(PyInterpreterState *interp, int verbose)
{ {
// Clear sys dict // Clear sys dict
if (verbose) { PyObject *name = PyUnicode_FromString("sys");
PySys_FormatStderr("# cleanup[3] wiping sys\n"); if (name != NULL) {
_PyModule_ClearDict(name, interp->sysdict);
Py_DECREF(name);
}
else {
PyErr_WriteUnraisable(NULL);
} }
_PyModule_ClearDict(interp->sysdict);
// Clear builtins dict // Clear builtins dict
if (verbose) { name = PyUnicode_FromString("builtins");
PySys_FormatStderr("# cleanup[3] wiping builtins\n"); if (name != NULL) {
_PyModule_ClearDict(name, interp->builtins);
Py_DECREF(name);
}
else {
PyErr_WriteUnraisable(NULL);
} }
_PyModule_ClearDict(interp->builtins);
} }
@ -1424,12 +1431,10 @@ static void
finalize_modules(PyThreadState *tstate) finalize_modules(PyThreadState *tstate)
{ {
PyInterpreterState *interp = tstate->interp; PyInterpreterState *interp = tstate->interp;
PyObject *modules = interp->modules; if (interp->modules == NULL) {
if (modules == NULL) {
// Already done // Already done
return; return;
} }
int verbose = _PyInterpreterState_GetConfig(interp)->verbose;
// Delete some special builtins._ and sys attributes first. These are // Delete some special builtins._ and sys attributes first. These are
// common places where user values hide and people complain when their // common places where user values hide and people complain when their
@ -1438,19 +1443,29 @@ finalize_modules(PyThreadState *tstate)
// destruction order. Sigh. // destruction order. Sigh.
// //
// XXX Perhaps these precautions are obsolete. Who knows? // XXX Perhaps these precautions are obsolete. Who knows?
finalize_modules_delete_special(tstate, verbose); finalize_modules_delete_special(tstate);
// Remove all modules from sys.modules, hoping that garbage collection // Remove all modules from sys.modules, hoping that garbage collection
// can reclaim most of them: set all sys.modules values to None. // can reclaim most of them: set all sys.modules values to None.
// //
// We prepare a list which will receive (name, weakref) tuples of // * First, clear the __main__ module.
// modules when they are removed from sys.modules. The name is used // * Then, clear modules from the most recently imported to the least
// for diagnosis messages (in verbose mode), while the weakref helps // recently imported: reversed(sys.modules).
// detect those modules which have been held alive. // * Exception: leave builtins and sys modules unchanged.
PyObject *weaklist = finalize_remove_modules(modules, verbose); // * Module attributes are set to None from the most recently defined to the
// least recently defined: reversed(module.__dict__).
//
// The special treatment of "builtins" here is because even
// when it's not referenced as a module, its dictionary is
// referenced by almost every module's __builtins__. Since
// deleting a module clears its dictionary (even if there are
// references left to it), we need to delete the "builtins"
// module last. Likewise, we don't delete sys until the very
// end because it is implicitly referenced (e.g. by print).
finalize_remove_modules(tstate);
// Clear the modules dict // Clear the modules dict
finalize_clear_modules_dict(modules); finalize_clear_modules_dict(interp->modules);
// Restore the original builtins dict, to ensure that any // Restore the original builtins dict, to ensure that any
// user data gets cleared. // user data gets cleared.
@ -1463,27 +1478,8 @@ finalize_modules(PyThreadState *tstate)
// machinery. // machinery.
_PyGC_DumpShutdownStats(tstate); _PyGC_DumpShutdownStats(tstate);
if (weaklist != NULL) {
// Now, if there are any modules left alive, clear their globals to
// minimize potential leaks. All C extension modules actually end
// up here, since they are kept alive in the interpreter state.
//
// The special treatment of "builtins" here is because even
// when it's not referenced as a module, its dictionary is
// referenced by almost every module's __builtins__. Since
// deleting a module clears its dictionary (even if there are
// references left to it), we need to delete the "builtins"
// module last. Likewise, we don't delete sys until the very
// end because it is implicitly referenced (e.g. by print).
//
// Since dict is ordered in CPython 3.6+, modules are saved in
// importing order. First clear modules imported later.
finalize_modules_clear_weaklist(interp, weaklist, verbose);
Py_DECREF(weaklist);
}
// Clear sys and builtins modules dict // Clear sys and builtins modules dict
finalize_clear_sys_builtins_dict(interp, verbose); finalize_clear_sys_builtins_dict(interp);
// Clear module dict copies stored in the interpreter state: // Clear module dict copies stored in the interpreter state:
// clear PyInterpreterState.modules_by_index and // clear PyInterpreterState.modules_by_index and