gh-117378: Fix multiprocessing forkserver preload sys.path inheritance. (GH-126538)

gh-117378: Fix multiprocessing forkserver preload sys.path inheritance.

`sys.path` was not properly being sent from the parent process when launching
the multiprocessing forkserver process to preload imports.  This bug has been
there since the forkserver start method was introduced in Python 3.4.  It was
always _supposed_ to inherit `sys.path` the same way the spawn method does.

Observable behavior change: A `''` value in `sys.path` will now be replaced in
the forkserver's `sys.path` with an absolute pathname
`os.path.abspath(os.getcwd())` saved at the time that `multiprocessing` was
imported in the parent process as it already was when using the spawn start
method. **This will only be observable during forkserver preload imports**.

The code invoked before calling things in another process already correctly sets `sys.path`.
Which is likely why this went unnoticed for so long as a mere performance issue in
some configurations.

A workaround for the bug on impacted Pythons is to set PYTHONPATH in the
environment before multiprocessing's forkserver process was started. Not perfect
as that is then inherited by other children, etc, but likely good enough for many
people's purposes.

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Gregory P. Smith 2024-11-09 15:01:32 -08:00 committed by GitHub
parent 266328552e
commit 9d08423b6e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 97 additions and 0 deletions

View File

@ -168,6 +168,8 @@ class ForkServer(object):
def main(listener_fd, alive_r, preload, main_path=None, sys_path=None):
'''Run forkserver.'''
if preload:
if sys_path is not None:
sys.path[:] = sys_path
if '__main__' in preload and main_path is not None:
process.current_process()._inheriting = True
try:

View File

@ -12,6 +12,7 @@ import itertools
import sys
import os
import gc
import importlib
import errno
import functools
import signal
@ -20,8 +21,10 @@ import collections.abc
import socket
import random
import logging
import shutil
import subprocess
import struct
import tempfile
import operator
import pickle
import weakref
@ -6397,6 +6400,81 @@ class _TestAtExit(BaseTestCase):
self.assertEqual(f.read(), 'deadbeef')
class _TestSpawnedSysPath(BaseTestCase):
"""Test that sys.path is setup in forkserver and spawn processes."""
ALLOWED_TYPES = ('processes',)
def setUp(self):
self._orig_sys_path = list(sys.path)
self._temp_dir = tempfile.mkdtemp(prefix="test_sys_path-")
self._mod_name = "unique_test_mod"
module_path = os.path.join(self._temp_dir, f"{self._mod_name}.py")
with open(module_path, "w", encoding="utf-8") as mod:
mod.write("# A simple test module\n")
sys.path[:] = [p for p in sys.path if p] # remove any existing ""s
sys.path.insert(0, self._temp_dir)
sys.path.insert(0, "") # Replaced with an abspath in child.
try:
self._ctx_forkserver = multiprocessing.get_context("forkserver")
except ValueError:
self._ctx_forkserver = None
self._ctx_spawn = multiprocessing.get_context("spawn")
def tearDown(self):
sys.path[:] = self._orig_sys_path
shutil.rmtree(self._temp_dir, ignore_errors=True)
@staticmethod
def enq_imported_module_names(queue):
queue.put(tuple(sys.modules))
def test_forkserver_preload_imports_sys_path(self):
ctx = self._ctx_forkserver
if not ctx:
self.skipTest("requires forkserver start method.")
self.assertNotIn(self._mod_name, sys.modules)
multiprocessing.forkserver._forkserver._stop() # Must be fresh.
ctx.set_forkserver_preload(
["test.test_multiprocessing_forkserver", self._mod_name])
q = ctx.Queue()
proc = ctx.Process(target=self.enq_imported_module_names, args=(q,))
proc.start()
proc.join()
child_imported_modules = q.get()
q.close()
self.assertIn(self._mod_name, child_imported_modules)
@staticmethod
def enq_sys_path_and_import(queue, mod_name):
queue.put(sys.path)
try:
importlib.import_module(mod_name)
except ImportError as exc:
queue.put(exc)
else:
queue.put(None)
def test_child_sys_path(self):
for ctx in (self._ctx_spawn, self._ctx_forkserver):
if not ctx:
continue
with self.subTest(f"{ctx.get_start_method()} start method"):
q = ctx.Queue()
proc = ctx.Process(target=self.enq_sys_path_and_import,
args=(q, self._mod_name))
proc.start()
proc.join()
child_sys_path = q.get()
import_error = q.get()
q.close()
self.assertNotIn("", child_sys_path) # replaced by an abspath
self.assertIn(self._temp_dir, child_sys_path) # our addition
# ignore the first element, it is the absolute "" replacement
self.assertEqual(child_sys_path[1:], sys.path[1:])
self.assertIsNone(import_error, msg=f"child could not import {self._mod_name}")
class MiscTestCase(unittest.TestCase):
def test__all__(self):
# Just make sure names in not_exported are excluded

View File

@ -0,0 +1,17 @@
Fixed the :mod:`multiprocessing` ``"forkserver"`` start method forkserver
process to correctly inherit the parent's :data:`sys.path` during the importing
of :func:`multiprocessing.set_forkserver_preload` modules in the same manner as
:data:`sys.path` is configured in workers before executing work items.
This bug caused some forkserver module preloading to silently fail to preload.
This manifested as a performance degration in child processes when the
``sys.path`` was required due to additional repeated work in every worker.
It could also have a side effect of ``""`` remaining in :data:`sys.path` during
forkserver preload imports instead of the absolute path from :func:`os.getcwd`
at multiprocessing import time used in the worker ``sys.path``.
Potentially leading to incorrect imports from the wrong location during
preload. We are unaware of that actually happening. The issue was discovered
by someone observing unexpected preload performance gains.