mirror of https://github.com/python/cpython
gh-117378: Fix multiprocessing forkserver preload sys.path inheritance. (GH-126538)
gh-117378: Fix multiprocessing forkserver preload sys.path inheritance. `sys.path` was not properly being sent from the parent process when launching the multiprocessing forkserver process to preload imports. This bug has been there since the forkserver start method was introduced in Python 3.4. It was always _supposed_ to inherit `sys.path` the same way the spawn method does. Observable behavior change: A `''` value in `sys.path` will now be replaced in the forkserver's `sys.path` with an absolute pathname `os.path.abspath(os.getcwd())` saved at the time that `multiprocessing` was imported in the parent process as it already was when using the spawn start method. **This will only be observable during forkserver preload imports**. The code invoked before calling things in another process already correctly sets `sys.path`. Which is likely why this went unnoticed for so long as a mere performance issue in some configurations. A workaround for the bug on impacted Pythons is to set PYTHONPATH in the environment before multiprocessing's forkserver process was started. Not perfect as that is then inherited by other children, etc, but likely good enough for many people's purposes. Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
266328552e
commit
9d08423b6e
|
@ -168,6 +168,8 @@ class ForkServer(object):
|
|||
def main(listener_fd, alive_r, preload, main_path=None, sys_path=None):
|
||||
'''Run forkserver.'''
|
||||
if preload:
|
||||
if sys_path is not None:
|
||||
sys.path[:] = sys_path
|
||||
if '__main__' in preload and main_path is not None:
|
||||
process.current_process()._inheriting = True
|
||||
try:
|
||||
|
|
|
@ -12,6 +12,7 @@ import itertools
|
|||
import sys
|
||||
import os
|
||||
import gc
|
||||
import importlib
|
||||
import errno
|
||||
import functools
|
||||
import signal
|
||||
|
@ -20,8 +21,10 @@ import collections.abc
|
|||
import socket
|
||||
import random
|
||||
import logging
|
||||
import shutil
|
||||
import subprocess
|
||||
import struct
|
||||
import tempfile
|
||||
import operator
|
||||
import pickle
|
||||
import weakref
|
||||
|
@ -6397,6 +6400,81 @@ class _TestAtExit(BaseTestCase):
|
|||
self.assertEqual(f.read(), 'deadbeef')
|
||||
|
||||
|
||||
class _TestSpawnedSysPath(BaseTestCase):
|
||||
"""Test that sys.path is setup in forkserver and spawn processes."""
|
||||
|
||||
ALLOWED_TYPES = ('processes',)
|
||||
|
||||
def setUp(self):
|
||||
self._orig_sys_path = list(sys.path)
|
||||
self._temp_dir = tempfile.mkdtemp(prefix="test_sys_path-")
|
||||
self._mod_name = "unique_test_mod"
|
||||
module_path = os.path.join(self._temp_dir, f"{self._mod_name}.py")
|
||||
with open(module_path, "w", encoding="utf-8") as mod:
|
||||
mod.write("# A simple test module\n")
|
||||
sys.path[:] = [p for p in sys.path if p] # remove any existing ""s
|
||||
sys.path.insert(0, self._temp_dir)
|
||||
sys.path.insert(0, "") # Replaced with an abspath in child.
|
||||
try:
|
||||
self._ctx_forkserver = multiprocessing.get_context("forkserver")
|
||||
except ValueError:
|
||||
self._ctx_forkserver = None
|
||||
self._ctx_spawn = multiprocessing.get_context("spawn")
|
||||
|
||||
def tearDown(self):
|
||||
sys.path[:] = self._orig_sys_path
|
||||
shutil.rmtree(self._temp_dir, ignore_errors=True)
|
||||
|
||||
@staticmethod
|
||||
def enq_imported_module_names(queue):
|
||||
queue.put(tuple(sys.modules))
|
||||
|
||||
def test_forkserver_preload_imports_sys_path(self):
|
||||
ctx = self._ctx_forkserver
|
||||
if not ctx:
|
||||
self.skipTest("requires forkserver start method.")
|
||||
self.assertNotIn(self._mod_name, sys.modules)
|
||||
multiprocessing.forkserver._forkserver._stop() # Must be fresh.
|
||||
ctx.set_forkserver_preload(
|
||||
["test.test_multiprocessing_forkserver", self._mod_name])
|
||||
q = ctx.Queue()
|
||||
proc = ctx.Process(target=self.enq_imported_module_names, args=(q,))
|
||||
proc.start()
|
||||
proc.join()
|
||||
child_imported_modules = q.get()
|
||||
q.close()
|
||||
self.assertIn(self._mod_name, child_imported_modules)
|
||||
|
||||
@staticmethod
|
||||
def enq_sys_path_and_import(queue, mod_name):
|
||||
queue.put(sys.path)
|
||||
try:
|
||||
importlib.import_module(mod_name)
|
||||
except ImportError as exc:
|
||||
queue.put(exc)
|
||||
else:
|
||||
queue.put(None)
|
||||
|
||||
def test_child_sys_path(self):
|
||||
for ctx in (self._ctx_spawn, self._ctx_forkserver):
|
||||
if not ctx:
|
||||
continue
|
||||
with self.subTest(f"{ctx.get_start_method()} start method"):
|
||||
q = ctx.Queue()
|
||||
proc = ctx.Process(target=self.enq_sys_path_and_import,
|
||||
args=(q, self._mod_name))
|
||||
proc.start()
|
||||
proc.join()
|
||||
child_sys_path = q.get()
|
||||
import_error = q.get()
|
||||
q.close()
|
||||
self.assertNotIn("", child_sys_path) # replaced by an abspath
|
||||
self.assertIn(self._temp_dir, child_sys_path) # our addition
|
||||
# ignore the first element, it is the absolute "" replacement
|
||||
self.assertEqual(child_sys_path[1:], sys.path[1:])
|
||||
self.assertIsNone(import_error, msg=f"child could not import {self._mod_name}")
|
||||
|
||||
|
||||
class MiscTestCase(unittest.TestCase):
|
||||
def test__all__(self):
|
||||
# Just make sure names in not_exported are excluded
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
Fixed the :mod:`multiprocessing` ``"forkserver"`` start method forkserver
|
||||
process to correctly inherit the parent's :data:`sys.path` during the importing
|
||||
of :func:`multiprocessing.set_forkserver_preload` modules in the same manner as
|
||||
:data:`sys.path` is configured in workers before executing work items.
|
||||
|
||||
This bug caused some forkserver module preloading to silently fail to preload.
|
||||
This manifested as a performance degration in child processes when the
|
||||
``sys.path`` was required due to additional repeated work in every worker.
|
||||
|
||||
It could also have a side effect of ``""`` remaining in :data:`sys.path` during
|
||||
forkserver preload imports instead of the absolute path from :func:`os.getcwd`
|
||||
at multiprocessing import time used in the worker ``sys.path``.
|
||||
|
||||
Potentially leading to incorrect imports from the wrong location during
|
||||
preload. We are unaware of that actually happening. The issue was discovered
|
||||
by someone observing unexpected preload performance gains.
|
||||
|
Loading…
Reference in New Issue