Issue #9260: A finer-grained import lock.

Most of the import sequence now uses per-module locks rather than the
global import lock, eliminating well-known issues with threads and imports.
This commit is contained in:
Antoine Pitrou 2012-05-17 18:55:59 +02:00
parent 5cec9d2ae5
commit ea3eb88bca
12 changed files with 3777 additions and 3041 deletions

View File

@ -30,13 +30,13 @@ Importing Modules
.. c:function:: PyObject* PyImport_ImportModuleNoBlock(const char *name) .. c:function:: PyObject* PyImport_ImportModuleNoBlock(const char *name)
This version of :c:func:`PyImport_ImportModule` does not block. It's intended This function is a deprecated alias of :c:func:`PyImport_ImportModule`.
to be used in C functions that import other modules to execute a function.
The import may block if another thread holds the import lock. The function .. versionchanged:: 3.3
:c:func:`PyImport_ImportModuleNoBlock` never blocks. It first tries to fetch This function used to fail immediately when the import lock was held
the module from sys.modules and falls back to :c:func:`PyImport_ImportModule` by another thread. In Python 3.3 though, the locking scheme switched
unless the lock is held, in which case the function will raise an to per-module locks for most purposes, so this function's special
:exc:`ImportError`. behaviour isn't needed anymore.
.. c:function:: PyObject* PyImport_ImportModuleEx(char *name, PyObject *globals, PyObject *locals, PyObject *fromlist) .. c:function:: PyObject* PyImport_ImportModuleEx(char *name, PyObject *globals, PyObject *locals, PyObject *fromlist)

View File

@ -112,18 +112,29 @@ This module provides an interface to the mechanisms used to implement the
Return ``True`` if the import lock is currently held, else ``False``. On Return ``True`` if the import lock is currently held, else ``False``. On
platforms without threads, always return ``False``. platforms without threads, always return ``False``.
On platforms with threads, a thread executing an import holds an internal lock On platforms with threads, a thread executing an import first holds a
until the import is complete. This lock blocks other threads from doing an global import lock, then sets up a per-module lock for the rest of the
import until the original import completes, which in turn prevents other threads import. This blocks other threads from importing the same module until
from seeing incomplete module objects constructed by the original thread while the original import completes, preventing other threads from seeing
in the process of completing its import (and the imports, if any, triggered by incomplete module objects constructed by the original thread. An
that). exception is made for circular imports, which by construction have to
expose an incomplete module object at some point.
.. note::
Locking semantics of imports are an implementation detail which may
vary from release to release. However, Python ensures that circular
imports work without any deadlocks.
.. versionchanged:: 3.3
In Python 3.3, the locking scheme has changed to per-module locks for
the most part.
.. function:: acquire_lock() .. function:: acquire_lock()
Acquire the interpreter's import lock for the current thread. This lock should Acquire the interpreter's global import lock for the current thread.
be used by import hooks to ensure thread-safety when importing modules. This lock should be used by import hooks to ensure thread-safety when
importing modules.
Once a thread has acquired the import lock, the same thread may acquire it Once a thread has acquired the import lock, the same thread may acquire it
again without blocking; the thread must release it once for each time it has again without blocking; the thread must release it once for each time it has
@ -134,8 +145,8 @@ This module provides an interface to the mechanisms used to implement the
.. function:: release_lock() .. function:: release_lock()
Release the interpreter's import lock. On platforms without threads, this Release the interpreter's global import lock. On platforms without
function does nothing. threads, this function does nothing.
.. function:: reload(module) .. function:: reload(module)

View File

@ -159,6 +159,145 @@ def new_module(name):
return type(_io)(name) return type(_io)(name)
# Module-level locking ########################################################
# A dict mapping module names to weakrefs of _ModuleLock instances
_module_locks = {}
# A dict mapping thread ids to _ModuleLock instances
_blocking_on = {}
class _DeadlockError(RuntimeError):
pass
class _ModuleLock:
"""A recursive lock implementation which is able to detect deadlocks
(e.g. thread 1 trying to take locks A then B, and thread 2 trying to
take locks B then A).
"""
def __init__(self, name):
self.lock = _thread.allocate_lock()
self.wakeup = _thread.allocate_lock()
self.name = name
self.owner = None
self.count = 0
self.waiters = 0
def has_deadlock(self):
# Deadlock avoidance for concurrent circular imports.
me = _thread.get_ident()
tid = self.owner
while True:
lock = _blocking_on.get(tid)
if lock is None:
return False
tid = lock.owner
if tid == me:
return True
def acquire(self):
"""
Acquire the module lock. If a potential deadlock is detected,
a _DeadlockError is raised.
Otherwise, the lock is always acquired and True is returned.
"""
tid = _thread.get_ident()
_blocking_on[tid] = self
try:
while True:
with self.lock:
if self.count == 0 or self.owner == tid:
self.owner = tid
self.count += 1
return True
if self.has_deadlock():
raise _DeadlockError("deadlock detected by %r" % self)
if self.wakeup.acquire(False):
self.waiters += 1
# Wait for a release() call
self.wakeup.acquire()
self.wakeup.release()
finally:
del _blocking_on[tid]
def release(self):
tid = _thread.get_ident()
with self.lock:
if self.owner != tid:
raise RuntimeError("cannot release un-acquired lock")
assert self.count > 0
self.count -= 1
if self.count == 0:
self.owner = None
if self.waiters:
self.waiters -= 1
self.wakeup.release()
def __repr__(self):
return "_ModuleLock(%r) at %d" % (self.name, id(self))
class _DummyModuleLock:
"""A simple _ModuleLock equivalent for Python builds without
multi-threading support."""
def __init__(self, name):
self.name = name
self.count = 0
def acquire(self):
self.count += 1
return True
def release(self):
if self.count == 0:
raise RuntimeError("cannot release un-acquired lock")
self.count -= 1
def __repr__(self):
return "_DummyModuleLock(%r) at %d" % (self.name, id(self))
# The following two functions are for consumption by Python/import.c.
def _get_module_lock(name):
"""Get or create the module lock for a given module name.
Should only be called with the import lock taken."""
lock = None
if name in _module_locks:
lock = _module_locks[name]()
if lock is None:
if _thread is None:
lock = _DummyModuleLock(name)
else:
lock = _ModuleLock(name)
def cb(_):
del _module_locks[name]
_module_locks[name] = _weakref.ref(lock, cb)
return lock
def _lock_unlock_module(name):
"""Release the global import lock, and acquires then release the
module lock for a given module name.
This is used to ensure a module is completely initialized, in the
event it is being imported by another thread.
Should only be called with the import lock taken."""
lock = _get_module_lock(name)
_imp.release_lock()
try:
lock.acquire()
except _DeadlockError:
# Concurrent circular import, we'll accept a partially initialized
# module object.
pass
else:
lock.release()
# Finder/loader utility code ################################################## # Finder/loader utility code ##################################################
_PYCACHE = '__pycache__' _PYCACHE = '__pycache__'
@ -264,12 +403,15 @@ def module_for_loader(fxn):
else: else:
module.__package__ = fullname.rpartition('.')[0] module.__package__ = fullname.rpartition('.')[0]
try: try:
module.__initializing__ = True
# If __package__ was not set above, __import__() will do it later. # If __package__ was not set above, __import__() will do it later.
return fxn(self, module, *args, **kwargs) return fxn(self, module, *args, **kwargs)
except: except:
if not is_reload: if not is_reload:
del sys.modules[fullname] del sys.modules[fullname]
raise raise
finally:
module.__initializing__ = False
_wrap(module_for_loader_wrapper, fxn) _wrap(module_for_loader_wrapper, fxn)
return module_for_loader_wrapper return module_for_loader_wrapper
@ -932,6 +1074,7 @@ def _find_module(name, path):
if not sys.meta_path: if not sys.meta_path:
_warnings.warn('sys.meta_path is empty', ImportWarning) _warnings.warn('sys.meta_path is empty', ImportWarning)
for finder in sys.meta_path: for finder in sys.meta_path:
with _ImportLockContext():
loader = finder.find_module(name, path) loader = finder.find_module(name, path)
if loader is not None: if loader is not None:
# The parent import may have already imported this module. # The parent import may have already imported this module.
@ -962,8 +1105,7 @@ def _sanity_check(name, package, level):
_ERR_MSG = 'No module named {!r}' _ERR_MSG = 'No module named {!r}'
def _find_and_load(name, import_): def _find_and_load_unlocked(name, import_):
"""Find and load the module."""
path = None path = None
parent = name.rpartition('.')[0] parent = name.rpartition('.')[0]
if parent: if parent:
@ -1009,6 +1151,19 @@ def _find_and_load(name, import_):
return module return module
def _find_and_load(name, import_):
"""Find and load the module, and release the import lock."""
try:
lock = _get_module_lock(name)
finally:
_imp.release_lock()
lock.acquire()
try:
return _find_and_load_unlocked(name, import_)
finally:
lock.release()
def _gcd_import(name, package=None, level=0): def _gcd_import(name, package=None, level=0):
"""Import and return the module based on its name, the package the call is """Import and return the module based on its name, the package the call is
being made from, and the level adjustment. being made from, and the level adjustment.
@ -1021,17 +1176,17 @@ def _gcd_import(name, package=None, level=0):
_sanity_check(name, package, level) _sanity_check(name, package, level)
if level > 0: if level > 0:
name = _resolve_name(name, package, level) name = _resolve_name(name, package, level)
with _ImportLockContext(): _imp.acquire_lock()
try: if name not in sys.modules:
return _find_and_load(name, _gcd_import)
module = sys.modules[name] module = sys.modules[name]
if module is None: if module is None:
_imp.release_lock()
message = ("import of {} halted; " message = ("import of {} halted; "
"None in sys.modules".format(name)) "None in sys.modules".format(name))
raise ImportError(message, name=name) raise ImportError(message, name=name)
_lock_unlock_module(name)
return module return module
except KeyError:
pass # Don't want to chain the exception
return _find_and_load(name, _gcd_import)
def _handle_fromlist(module, fromlist, import_): def _handle_fromlist(module, fromlist, import_):
@ -1149,7 +1304,17 @@ def _setup(sys_module, _imp_module):
continue continue
else: else:
raise ImportError('importlib requires posix or nt') raise ImportError('importlib requires posix or nt')
try:
thread_module = BuiltinImporter.load_module('_thread')
except ImportError:
# Python was built without threads
thread_module = None
weakref_module = BuiltinImporter.load_module('_weakref')
setattr(self_module, '_os', os_module) setattr(self_module, '_os', os_module)
setattr(self_module, '_thread', thread_module)
setattr(self_module, '_weakref', weakref_module)
setattr(self_module, 'path_sep', path_sep) setattr(self_module, 'path_sep', path_sep)
setattr(self_module, 'path_separators', set(path_separators)) setattr(self_module, 'path_separators', set(path_separators))
# Constants # Constants

View File

@ -0,0 +1,115 @@
from importlib import _bootstrap
import time
import unittest
import weakref
from test import support
try:
import threading
except ImportError:
threading = None
else:
from test import lock_tests
LockType = _bootstrap._ModuleLock
DeadlockError = _bootstrap._DeadlockError
if threading is not None:
class ModuleLockAsRLockTests(lock_tests.RLockTests):
locktype = staticmethod(lambda: LockType("some_lock"))
# _is_owned() unsupported
test__is_owned = None
# acquire(blocking=False) unsupported
test_try_acquire = None
test_try_acquire_contended = None
# `with` unsupported
test_with = None
# acquire(timeout=...) unsupported
test_timeout = None
# _release_save() unsupported
test_release_save_unacquired = None
else:
class ModuleLockAsRLockTests(unittest.TestCase):
pass
@unittest.skipUnless(threading, "threads needed for this test")
class DeadlockAvoidanceTests(unittest.TestCase):
def run_deadlock_avoidance_test(self, create_deadlock):
NLOCKS = 10
locks = [LockType(str(i)) for i in range(NLOCKS)]
pairs = [(locks[i], locks[(i+1)%NLOCKS]) for i in range(NLOCKS)]
if create_deadlock:
NTHREADS = NLOCKS
else:
NTHREADS = NLOCKS - 1
barrier = threading.Barrier(NTHREADS)
results = []
def _acquire(lock):
"""Try to acquire the lock. Return True on success, False on deadlock."""
try:
lock.acquire()
except DeadlockError:
return False
else:
return True
def f():
a, b = pairs.pop()
ra = _acquire(a)
barrier.wait()
rb = _acquire(b)
results.append((ra, rb))
if rb:
b.release()
if ra:
a.release()
lock_tests.Bunch(f, NTHREADS).wait_for_finished()
self.assertEqual(len(results), NTHREADS)
return results
def test_deadlock(self):
results = self.run_deadlock_avoidance_test(True)
# One of the threads detected a potential deadlock on its second
# acquire() call.
self.assertEqual(results.count((True, False)), 1)
self.assertEqual(results.count((True, True)), len(results) - 1)
def test_no_deadlock(self):
results = self.run_deadlock_avoidance_test(False)
self.assertEqual(results.count((True, False)), 0)
self.assertEqual(results.count((True, True)), len(results))
class LifetimeTests(unittest.TestCase):
def test_lock_lifetime(self):
name = "xyzzy"
self.assertNotIn(name, _bootstrap._module_locks)
lock = _bootstrap._get_module_lock(name)
self.assertIn(name, _bootstrap._module_locks)
wr = weakref.ref(lock)
del lock
support.gc_collect()
self.assertNotIn(name, _bootstrap._module_locks)
self.assertIs(wr(), None)
def test_all_locks(self):
support.gc_collect()
self.assertEqual(0, len(_bootstrap._module_locks))
@support.reap_threads
def test_main():
support.run_unittest(ModuleLockAsRLockTests,
DeadlockAvoidanceTests,
LifetimeTests)
if __name__ == '__main__':
test_main()

View File

@ -167,7 +167,7 @@ def visiblename(name, all=None, obj=None):
if name in {'__builtins__', '__doc__', '__file__', '__path__', if name in {'__builtins__', '__doc__', '__file__', '__path__',
'__module__', '__name__', '__slots__', '__package__', '__module__', '__name__', '__slots__', '__package__',
'__cached__', '__author__', '__credits__', '__date__', '__cached__', '__author__', '__credits__', '__date__',
'__version__', '__qualname__'}: '__version__', '__qualname__', '__initializing__'}:
return 0 return 0
# Private names are hidden, but special names are displayed. # Private names are hidden, but special names are displayed.
if name.startswith('__') and name.endswith('__'): return 1 if name.startswith('__') and name.endswith('__'): return 1

View File

@ -247,7 +247,6 @@ class RLockTests(BaseLockTests):
# Cannot release an unacquired lock # Cannot release an unacquired lock
lock = self.locktype() lock = self.locktype()
self.assertRaises(RuntimeError, lock.release) self.assertRaises(RuntimeError, lock.release)
self.assertRaises(RuntimeError, lock._release_save)
lock.acquire() lock.acquire()
lock.acquire() lock.acquire()
lock.release() lock.release()
@ -255,6 +254,17 @@ class RLockTests(BaseLockTests):
lock.release() lock.release()
lock.release() lock.release()
self.assertRaises(RuntimeError, lock.release) self.assertRaises(RuntimeError, lock.release)
def test_release_save_unacquired(self):
# Cannot _release_save an unacquired lock
lock = self.locktype()
self.assertRaises(RuntimeError, lock._release_save)
lock.acquire()
lock.acquire()
lock.release()
lock.acquire()
lock.release()
lock.release()
self.assertRaises(RuntimeError, lock._release_save) self.assertRaises(RuntimeError, lock._release_save)
def test_different_thread(self): def test_different_thread(self):

View File

@ -23,6 +23,8 @@ def cleanout(root):
def fixdir(lst): def fixdir(lst):
if "__builtins__" in lst: if "__builtins__" in lst:
lst.remove("__builtins__") lst.remove("__builtins__")
if "__initializing__" in lst:
lst.remove("__initializing__")
return lst return lst

View File

@ -12,7 +12,7 @@ import time
import shutil import shutil
import unittest import unittest
from test.support import ( from test.support import (
verbose, import_module, run_unittest, TESTFN, reap_threads) verbose, import_module, run_unittest, TESTFN, reap_threads, forget)
threading = import_module('threading') threading = import_module('threading')
def task(N, done, done_tasks, errors): def task(N, done, done_tasks, errors):
@ -187,7 +187,7 @@ class ThreadedImportTests(unittest.TestCase):
contents = contents % {'delay': delay} contents = contents % {'delay': delay}
with open(os.path.join(TESTFN, name + ".py"), "wb") as f: with open(os.path.join(TESTFN, name + ".py"), "wb") as f:
f.write(contents.encode('utf-8')) f.write(contents.encode('utf-8'))
self.addCleanup(sys.modules.pop, name, None) self.addCleanup(forget, name)
results = [] results = []
def import_ab(): def import_ab():
@ -204,6 +204,21 @@ class ThreadedImportTests(unittest.TestCase):
t2.join() t2.join()
self.assertEqual(set(results), {'a', 'b'}) self.assertEqual(set(results), {'a', 'b'})
def test_side_effect_import(self):
code = """if 1:
import threading
def target():
import random
t = threading.Thread(target=target)
t.start()
t.join()"""
sys.path.insert(0, os.curdir)
self.addCleanup(sys.path.remove, os.curdir)
with open(TESTFN + ".py", "wb") as f:
f.write(code.encode('utf-8'))
self.addCleanup(forget, TESTFN)
__import__(TESTFN)
@reap_threads @reap_threads
def test_main(): def test_main():

View File

@ -70,7 +70,7 @@ NT_OFFSET = 256
tok_name = {value: name tok_name = {value: name
for name, value in globals().items() for name, value in globals().items()
if isinstance(value, int)} if isinstance(value, int) and not name.startswith('_')}
__all__.extend(tok_name.values()) __all__.extend(tok_name.values())
def ISTERMINAL(x): def ISTERMINAL(x):

View File

@ -10,6 +10,10 @@ What's New in Python 3.3.0 Alpha 4?
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #9260: A finer-grained import lock. Most of the import sequence
now uses per-module locks rather than the global import lock, eliminating
well-known issues with threads and imports.
- Issue #14624: UTF-16 decoding is now 3x to 4x faster on various inputs. - Issue #14624: UTF-16 decoding is now 3x to 4x faster on various inputs.
Patch by Serhiy Storchaka. Patch by Serhiy Storchaka.

View File

@ -1370,47 +1370,7 @@ PyImport_ImportModule(const char *name)
PyObject * PyObject *
PyImport_ImportModuleNoBlock(const char *name) PyImport_ImportModuleNoBlock(const char *name)
{ {
PyObject *nameobj, *modules, *result; return PyImport_ImportModule(name);
#ifdef WITH_THREAD
long me;
#endif
/* Try to get the module from sys.modules[name] */
modules = PyImport_GetModuleDict();
if (modules == NULL)
return NULL;
nameobj = PyUnicode_FromString(name);
if (nameobj == NULL)
return NULL;
result = PyDict_GetItem(modules, nameobj);
if (result != NULL) {
Py_DECREF(nameobj);
Py_INCREF(result);
return result;
}
PyErr_Clear();
#ifdef WITH_THREAD
/* check the import lock
* me might be -1 but I ignore the error here, the lock function
* takes care of the problem */
me = PyThread_get_thread_ident();
if (import_lock_thread == -1 || import_lock_thread == me) {
/* no thread or me is holding the lock */
result = PyImport_Import(nameobj);
}
else {
PyErr_Format(PyExc_ImportError,
"Failed to import %R because the import lock"
"is held by another thread.",
nameobj);
result = NULL;
}
#else
result = PyImport_Import(nameobj);
#endif
Py_DECREF(nameobj);
return result;
} }
@ -1420,11 +1380,13 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *given_globals,
int level) int level)
{ {
_Py_IDENTIFIER(__import__); _Py_IDENTIFIER(__import__);
_Py_IDENTIFIER(__initializing__);
_Py_IDENTIFIER(__package__); _Py_IDENTIFIER(__package__);
_Py_IDENTIFIER(__path__); _Py_IDENTIFIER(__path__);
_Py_IDENTIFIER(__name__); _Py_IDENTIFIER(__name__);
_Py_IDENTIFIER(_find_and_load); _Py_IDENTIFIER(_find_and_load);
_Py_IDENTIFIER(_handle_fromlist); _Py_IDENTIFIER(_handle_fromlist);
_Py_IDENTIFIER(_lock_unlock_module);
_Py_static_string(single_dot, "."); _Py_static_string(single_dot, ".");
PyObject *abs_name = NULL; PyObject *abs_name = NULL;
PyObject *builtins_import = NULL; PyObject *builtins_import = NULL;
@ -1607,16 +1569,48 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *given_globals,
goto error_with_unlock; goto error_with_unlock;
} }
else if (mod != NULL) { else if (mod != NULL) {
PyObject *value;
int initializing = 0;
Py_INCREF(mod); Py_INCREF(mod);
/* Only call _bootstrap._lock_unlock_module() if __initializing__ is true. */
value = _PyObject_GetAttrId(mod, &PyId___initializing__);
if (value == NULL)
PyErr_Clear();
else {
initializing = PyObject_IsTrue(value);
Py_DECREF(value);
if (initializing == -1)
PyErr_Clear();
}
if (initializing > 0) {
/* _bootstrap._lock_unlock_module() releases the import lock */
value = _PyObject_CallMethodObjIdArgs(interp->importlib,
&PyId__lock_unlock_module, abs_name,
NULL);
if (value == NULL)
goto error;
Py_DECREF(value);
} }
else { else {
#ifdef WITH_THREAD
if (_PyImport_ReleaseLock() < 0) {
PyErr_SetString(PyExc_RuntimeError, "not holding the import lock");
goto error;
}
#endif
}
}
else {
/* _bootstrap._find_and_load() releases the import lock */
mod = _PyObject_CallMethodObjIdArgs(interp->importlib, mod = _PyObject_CallMethodObjIdArgs(interp->importlib,
&PyId__find_and_load, abs_name, &PyId__find_and_load, abs_name,
builtins_import, NULL); builtins_import, NULL);
if (mod == NULL) { if (mod == NULL) {
goto error_with_unlock; goto error;
} }
} }
/* From now on we don't hold the import lock anymore. */
if (PyObject_Not(fromlist)) { if (PyObject_Not(fromlist)) {
if (level == 0 || PyUnicode_GET_LENGTH(name) > 0) { if (level == 0 || PyUnicode_GET_LENGTH(name) > 0) {
@ -1625,12 +1619,12 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *given_globals,
PyObject *borrowed_dot = _PyUnicode_FromId(&single_dot); PyObject *borrowed_dot = _PyUnicode_FromId(&single_dot);
if (borrowed_dot == NULL) { if (borrowed_dot == NULL) {
goto error_with_unlock; goto error;
} }
partition = PyUnicode_Partition(name, borrowed_dot); partition = PyUnicode_Partition(name, borrowed_dot);
if (partition == NULL) { if (partition == NULL) {
goto error_with_unlock; goto error;
} }
if (PyUnicode_GET_LENGTH(PyTuple_GET_ITEM(partition, 1)) == 0) { if (PyUnicode_GET_LENGTH(PyTuple_GET_ITEM(partition, 1)) == 0) {
@ -1638,7 +1632,7 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *given_globals,
Py_DECREF(partition); Py_DECREF(partition);
final_mod = mod; final_mod = mod;
Py_INCREF(mod); Py_INCREF(mod);
goto exit_with_unlock; goto error;
} }
front = PyTuple_GET_ITEM(partition, 0); front = PyTuple_GET_ITEM(partition, 0);
@ -1657,7 +1651,7 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *given_globals,
abs_name_len - cut_off); abs_name_len - cut_off);
Py_DECREF(front); Py_DECREF(front);
if (to_return == NULL) { if (to_return == NULL) {
goto error_with_unlock; goto error;
} }
final_mod = PyDict_GetItem(interp->modules, to_return); final_mod = PyDict_GetItem(interp->modules, to_return);
@ -1683,8 +1677,8 @@ PyImport_ImportModuleLevelObject(PyObject *name, PyObject *given_globals,
fromlist, builtins_import, fromlist, builtins_import,
NULL); NULL);
} }
goto error;
exit_with_unlock:
error_with_unlock: error_with_unlock:
#ifdef WITH_THREAD #ifdef WITH_THREAD
if (_PyImport_ReleaseLock() < 0) { if (_PyImport_ReleaseLock() < 0) {

File diff suppressed because it is too large Load Diff