mirror of https://github.com/python/cpython
GH-106176, GH-104702: Fix reference leak when importing across multiple threads (#108497)
This commit is contained in:
parent
c780698e9b
commit
5f85b443f7
|
@ -51,18 +51,106 @@ def _new_module(name):
|
|||
|
||||
# Module-level locking ########################################################
|
||||
|
||||
# A dict mapping module names to weakrefs of _ModuleLock instances
|
||||
# Dictionary protected by the global import lock
|
||||
# For a list that can have a weakref to it.
|
||||
class _List(list):
|
||||
pass
|
||||
|
||||
|
||||
# Copied from weakref.py with some simplifications and modifications unique to
|
||||
# bootstrapping importlib. Many methods were simply deleting for simplicity, so if they
|
||||
# are needed in the future they may work if simply copied back in.
|
||||
class _WeakValueDictionary:
|
||||
|
||||
def __init__(self):
|
||||
self_weakref = _weakref.ref(self)
|
||||
|
||||
# Inlined to avoid issues with inheriting from _weakref.ref before _weakref is
|
||||
# set by _setup(). Since there's only one instance of this class, this is
|
||||
# not expensive.
|
||||
class KeyedRef(_weakref.ref):
|
||||
|
||||
__slots__ = "key",
|
||||
|
||||
def __new__(type, ob, key):
|
||||
self = super().__new__(type, ob, type.remove)
|
||||
self.key = key
|
||||
return self
|
||||
|
||||
def __init__(self, ob, key):
|
||||
super().__init__(ob, self.remove)
|
||||
|
||||
@staticmethod
|
||||
def remove(wr):
|
||||
nonlocal self_weakref
|
||||
|
||||
self = self_weakref()
|
||||
if self is not None:
|
||||
if self._iterating:
|
||||
self._pending_removals.append(wr.key)
|
||||
else:
|
||||
_weakref._remove_dead_weakref(self.data, wr.key)
|
||||
|
||||
self._KeyedRef = KeyedRef
|
||||
self.clear()
|
||||
|
||||
def clear(self):
|
||||
self._pending_removals = []
|
||||
self._iterating = set()
|
||||
self.data = {}
|
||||
|
||||
def _commit_removals(self):
|
||||
pop = self._pending_removals.pop
|
||||
d = self.data
|
||||
while True:
|
||||
try:
|
||||
key = pop()
|
||||
except IndexError:
|
||||
return
|
||||
_weakref._remove_dead_weakref(d, key)
|
||||
|
||||
def get(self, key, default=None):
|
||||
if self._pending_removals:
|
||||
self._commit_removals()
|
||||
try:
|
||||
wr = self.data[key]
|
||||
except KeyError:
|
||||
return default
|
||||
else:
|
||||
if (o := wr()) is None:
|
||||
return default
|
||||
else:
|
||||
return o
|
||||
|
||||
def setdefault(self, key, default=None):
|
||||
try:
|
||||
o = self.data[key]()
|
||||
except KeyError:
|
||||
o = None
|
||||
if o is None:
|
||||
if self._pending_removals:
|
||||
self._commit_removals()
|
||||
self.data[key] = self._KeyedRef(default, key)
|
||||
return default
|
||||
else:
|
||||
return o
|
||||
|
||||
|
||||
# A dict mapping module names to weakrefs of _ModuleLock instances.
|
||||
# Dictionary protected by the global import lock.
|
||||
_module_locks = {}
|
||||
|
||||
# A dict mapping thread IDs to lists of _ModuleLock instances. This maps a
|
||||
# thread to the module locks it is blocking on acquiring. The values are
|
||||
# lists because a single thread could perform a re-entrant import and be "in
|
||||
# the process" of blocking on locks for more than one module. A thread can
|
||||
# be "in the process" because a thread cannot actually block on acquiring
|
||||
# more than one lock but it can have set up bookkeeping that reflects that
|
||||
# it intends to block on acquiring more than one lock.
|
||||
_blocking_on = {}
|
||||
# A dict mapping thread IDs to weakref'ed lists of _ModuleLock instances.
|
||||
# This maps a thread to the module locks it is blocking on acquiring. The
|
||||
# values are lists because a single thread could perform a re-entrant import
|
||||
# and be "in the process" of blocking on locks for more than one module. A
|
||||
# thread can be "in the process" because a thread cannot actually block on
|
||||
# acquiring more than one lock but it can have set up bookkeeping that reflects
|
||||
# that it intends to block on acquiring more than one lock.
|
||||
#
|
||||
# The dictionary uses a WeakValueDictionary to avoid keeping unnecessary
|
||||
# lists around, regardless of GC runs. This way there's no memory leak if
|
||||
# the list is no longer needed (GH-106176).
|
||||
_blocking_on = None
|
||||
|
||||
|
||||
class _BlockingOnManager:
|
||||
|
@ -79,7 +167,7 @@ class _BlockingOnManager:
|
|||
# re-entrant (i.e., a single thread may take it more than once) so it
|
||||
# wouldn't help us be correct in the face of re-entrancy either.
|
||||
|
||||
self.blocked_on = _blocking_on.setdefault(self.thread_id, [])
|
||||
self.blocked_on = _blocking_on.setdefault(self.thread_id, _List())
|
||||
self.blocked_on.append(self.lock)
|
||||
|
||||
def __exit__(self, *args, **kwargs):
|
||||
|
@ -1409,7 +1497,7 @@ def _setup(sys_module, _imp_module):
|
|||
modules, those two modules must be explicitly passed in.
|
||||
|
||||
"""
|
||||
global _imp, sys
|
||||
global _imp, sys, _blocking_on
|
||||
_imp = _imp_module
|
||||
sys = sys_module
|
||||
|
||||
|
@ -1437,6 +1525,9 @@ def _setup(sys_module, _imp_module):
|
|||
builtin_module = sys.modules[builtin_name]
|
||||
setattr(self_module, builtin_name, builtin_module)
|
||||
|
||||
# Instantiation requires _weakref to have been set.
|
||||
_blocking_on = _WeakValueDictionary()
|
||||
|
||||
|
||||
def _install(sys_module, _imp_module):
|
||||
"""Install importers for builtin and frozen modules"""
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
Use a ``WeakValueDictionary`` to track the lists containing the modules each
|
||||
thread is currently importing. This helps avoid a reference leak from
|
||||
keeping the list around longer than necessary. Weakrefs are used as GC can't
|
||||
interrupt the cleanup.
|
Loading…
Reference in New Issue