From b65f2cdfa77d8d12c213aec663ddaaa30d75a4b2 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 26 Sep 2024 16:57:19 -0700 Subject: [PATCH] gh-84559: Change the multiprocessing start method default to `forkserver` (GH-101556) Change the default multiprocessing start method away from fork to forkserver or spawn on the remaining platforms where it was fork. See the issue for context. This makes the default far more thread safe (other than for people spawning threads at import time... - don't do that!). Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/library/concurrent.futures.rst | 14 +++++----- Doc/library/multiprocessing.rst | 21 ++++++++++----- Doc/whatsnew/3.14.rst | 8 ++++++ Lib/multiprocessing/context.py | 26 +++++++++---------- Lib/test/_test_multiprocessing.py | 24 +++++++++++++---- Lib/test/support/__init__.py | 10 ++++++- ...4-09-19-00-09-48.gh-issue-84559.IrxvQe.rst | 5 ++++ 7 files changed, 75 insertions(+), 33 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst index e3b24451188..ce72127127c 100644 --- a/Doc/library/concurrent.futures.rst +++ b/Doc/library/concurrent.futures.rst @@ -286,14 +286,6 @@ to a :class:`ProcessPoolExecutor` will result in deadlock. Added the *initializer* and *initargs* arguments. - .. note:: - The default :mod:`multiprocessing` start method - (see :ref:`multiprocessing-start-methods`) will change away from - *fork* in Python 3.14. Code that requires *fork* be used for their - :class:`ProcessPoolExecutor` should explicitly specify that by - passing a ``mp_context=multiprocessing.get_context("fork")`` - parameter. - .. versionchanged:: 3.11 The *max_tasks_per_child* argument was added to allow users to control the lifetime of workers in the pool. @@ -310,6 +302,12 @@ to a :class:`ProcessPoolExecutor` will result in deadlock. *max_workers* uses :func:`os.process_cpu_count` by default, instead of :func:`os.cpu_count`. + .. versionchanged:: 3.14 + The default process start method (see + :ref:`multiprocessing-start-methods`) changed away from *fork*. If you + require the *fork* start method for :class:`ProcessPoolExecutor` you must + explicitly pass ``mp_context=multiprocessing.get_context("fork")``. + .. _processpoolexecutor-example: ProcessPoolExecutor Example diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index 80d6e4dae24..036b8f44b9f 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -124,11 +124,11 @@ to start a process. These *start methods* are inherited by the child process. Note that safely forking a multithreaded process is problematic. - Available on POSIX systems. Currently the default on POSIX except macOS. + Available on POSIX systems. - .. note:: - The default start method will change away from *fork* in Python 3.14. - Code that requires *fork* should explicitly specify that via + .. versionchanged:: 3.14 + This is no longer the default start method on any platform. + Code that requires *fork* must explicitly specify that via :func:`get_context` or :func:`set_start_method`. .. versionchanged:: 3.12 @@ -146,9 +146,11 @@ to start a process. These *start methods* are side-effect so it is generally safe for it to use :func:`os.fork`. No unnecessary resources are inherited. - Available on POSIX platforms which support passing file descriptors - over Unix pipes such as Linux. + Available on POSIX platforms which support passing file descriptors over + Unix pipes such as Linux. The default on those. + .. versionchanged:: 3.14 + This became the default start method on POSIX platforms. .. versionchanged:: 3.4 *spawn* added on all POSIX platforms, and *forkserver* added for @@ -162,6 +164,13 @@ to start a process. These *start methods* are method should be considered unsafe as it can lead to crashes of the subprocess as macOS system libraries may start threads. See :issue:`33725`. +.. versionchanged:: 3.14 + + On POSIX platforms the default start method was changed from *fork* to + *forkserver* to retain the performance but avoid common multithreaded + process incompatibilities. See :gh:`84559`. + + On POSIX using the *spawn* or *forkserver* start methods will also start a *resource tracker* process which tracks the unlinked named system resources (such as named semaphores or diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 5b9b0186004..6875c4c909b 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -385,6 +385,14 @@ Deprecated as a single positional argument. (Contributed by Serhiy Storchaka in :gh:`109218`.) +* :mod:`multiprocessing` and :mod:`concurrent.futures`: + The default start method (see :ref:`multiprocessing-start-methods`) changed + away from *fork* to *forkserver* on platforms where it was not already + *spawn* (Windows & macOS). If you require the threading incompatible *fork* + start method you must explicitly specify it when using :mod:`multiprocessing` + or :mod:`concurrent.futures` APIs. + (Contributed by Gregory P. Smith in :gh:`84559`.) + * :mod:`os`: :term:`Soft deprecate ` :func:`os.popen` and :func:`os.spawn* ` functions. They should no longer be used to diff --git a/Lib/multiprocessing/context.py b/Lib/multiprocessing/context.py index ddcc7e79009..d0a3ad00e53 100644 --- a/Lib/multiprocessing/context.py +++ b/Lib/multiprocessing/context.py @@ -259,13 +259,12 @@ class DefaultContext(BaseContext): def get_all_start_methods(self): """Returns a list of the supported start methods, default first.""" - if sys.platform == 'win32': - return ['spawn'] - else: - methods = ['spawn', 'fork'] if sys.platform == 'darwin' else ['fork', 'spawn'] - if reduction.HAVE_SEND_HANDLE: - methods.append('forkserver') - return methods + default = self._default_context.get_start_method() + start_method_names = [default] + start_method_names.extend( + name for name in _concrete_contexts if name != default + ) + return start_method_names # @@ -320,14 +319,15 @@ if sys.platform != 'win32': 'spawn': SpawnContext(), 'forkserver': ForkServerContext(), } - if sys.platform == 'darwin': - # bpo-33725: running arbitrary code after fork() is no longer reliable - # on macOS since macOS 10.14 (Mojave). Use spawn by default instead. - _default_context = DefaultContext(_concrete_contexts['spawn']) + # bpo-33725: running arbitrary code after fork() is no longer reliable + # on macOS since macOS 10.14 (Mojave). Use spawn by default instead. + # gh-84559: We changed everyones default to a thread safeish one in 3.14. + if reduction.HAVE_SEND_HANDLE and sys.platform != 'darwin': + _default_context = DefaultContext(_concrete_contexts['forkserver']) else: - _default_context = DefaultContext(_concrete_contexts['fork']) + _default_context = DefaultContext(_concrete_contexts['spawn']) -else: +else: # Windows class SpawnProcess(process.BaseProcess): _start_method = 'spawn' diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 4b3a0645cfc..a059a6b8340 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -5553,15 +5553,29 @@ class TestStartMethod(unittest.TestCase): multiprocessing.set_start_method(old_method, force=True) self.assertGreaterEqual(count, 1) - def test_get_all(self): + def test_get_all_start_methods(self): methods = multiprocessing.get_all_start_methods() + self.assertIn('spawn', methods) if sys.platform == 'win32': self.assertEqual(methods, ['spawn']) + elif sys.platform == 'darwin': + self.assertEqual(methods[0], 'spawn') # The default is first. + # Whether these work or not, they remain available on macOS. + self.assertIn('fork', methods) + self.assertIn('forkserver', methods) else: - self.assertTrue(methods == ['fork', 'spawn'] or - methods == ['spawn', 'fork'] or - methods == ['fork', 'spawn', 'forkserver'] or - methods == ['spawn', 'fork', 'forkserver']) + # POSIX + self.assertIn('fork', methods) + if other_methods := set(methods) - {'fork', 'spawn'}: + # If there are more than those two, forkserver must be one. + self.assertEqual({'forkserver'}, other_methods) + # The default is the first method in the list. + self.assertIn(methods[0], {'forkserver', 'spawn'}, + msg='3.14+ default must not be fork') + if methods[0] == 'spawn': + # Confirm that the current default selection logic prefers + # forkserver vs spawn when available. + self.assertNotIn('forkserver', methods) def test_preload_resources(self): if multiprocessing.get_start_method() != 'forkserver': diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 628529b8664..99cb10fc7b5 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2209,7 +2209,15 @@ def skip_if_broken_multiprocessing_synchronize(): # bpo-38377: On Linux, creating a semaphore fails with OSError # if the current user does not have the permission to create # a file in /dev/shm/ directory. - synchronize.Lock(ctx=None) + import multiprocessing + synchronize.Lock(ctx=multiprocessing.get_context('fork')) + # The explicit fork mp context is required in order for + # TestResourceTracker.test_resource_tracker_reused to work. + # synchronize creates a new multiprocessing.resource_tracker + # process at module import time via the above call in that + # scenario. Awkward. This enables gh-84559. No code involved + # should have threads at that point so fork() should be safe. + except OSError as exc: raise unittest.SkipTest(f"broken multiprocessing SemLock: {exc!r}") diff --git a/Misc/NEWS.d/next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst b/Misc/NEWS.d/next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst new file mode 100644 index 00000000000..a4428e20f3c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-09-19-00-09-48.gh-issue-84559.IrxvQe.rst @@ -0,0 +1,5 @@ +The default :mod:`multiprocessing` start method on Linux and other POSIX +systems has been changed away from often unsafe ``"fork"`` to ``"forkserver"`` +(when the platform supports sending file handles over pipes as most do) or +``"spawn"``. Mac and Windows are unchanged as they already default to +``"spawn"``.