From bb0cf8fd60e71581a90179af63e60e8704c3814b Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 27 Feb 2023 09:21:18 -0700 Subject: [PATCH] gh-102251: Updates to test_imp Toward Fixing Some Refleaks (gh-102254) This is related to fixing the refleaks introduced by commit 096d009. I haven't been able to find the leak yet, but these changes are a consequence of that effort. This includes some cleanup, some tweaks to the existing tests, and a bunch of new test cases. The only change here that might have impact outside the tests in question is in imp.py, where I update imp.load_dynamic() to use spec_from_file_location() instead of creating a ModuleSpec directly. Also note that I've updated the tests to only skip if we're checking for refleaks (regrtest's --huntrleaks), whereas in gh-101969 I had skipped the tests entirely. The tests will be useful for some upcoming work and I'd rather the refleaks not hold that up. (It isn't clear how quickly we'll be able to fix the leaking code, though it will certainly be done in the short term.) https://github.com/python/cpython/issues/102251 --- Lib/imp.py | 4 +- Lib/test/test_imp.py | 1110 +++++++++++++++++++++++++++--------- Modules/_testsinglephase.c | 22 +- Python/import.c | 115 +++- 4 files changed, 952 insertions(+), 299 deletions(-) diff --git a/Lib/imp.py b/Lib/imp.py index fc42c157658..fe850f6a001 100644 --- a/Lib/imp.py +++ b/Lib/imp.py @@ -338,8 +338,8 @@ if create_dynamic: # Issue #24748: Skip the sys.modules check in _load_module_shim; # always load new extension - spec = importlib.machinery.ModuleSpec( - name=name, loader=loader, origin=path) + spec = importlib.util.spec_from_file_location( + name, path, loader=loader) return _load(spec) else: diff --git a/Lib/test/test_imp.py b/Lib/test/test_imp.py index 2292bb20939..03e3adba221 100644 --- a/Lib/test/test_imp.py +++ b/Lib/test/test_imp.py @@ -1,4 +1,5 @@ import gc +import json import importlib import importlib.util import os @@ -11,6 +12,7 @@ from test.support import os_helper from test.support import script_helper from test.support import warnings_helper import textwrap +import types import unittest import warnings imp = warnings_helper.import_deprecated('imp') @@ -39,6 +41,169 @@ def requires_load_dynamic(meth): 'imp.load_dynamic() required')(meth) +class ModuleSnapshot(types.SimpleNamespace): + """A representation of a module for testing. + + Fields: + + * id - the module's object ID + * module - the actual module or an adequate substitute + * __file__ + * __spec__ + * name + * origin + * ns - a copy (dict) of the module's __dict__ (or None) + * ns_id - the object ID of the module's __dict__ + * cached - the sys.modules[mod.__spec__.name] entry (or None) + * cached_id - the object ID of the sys.modules entry (or None) + + In cases where the value is not available (e.g. due to serialization), + the value will be None. + """ + _fields = tuple('id module ns ns_id cached cached_id'.split()) + + @classmethod + def from_module(cls, mod): + name = mod.__spec__.name + cached = sys.modules.get(name) + return cls( + id=id(mod), + module=mod, + ns=types.SimpleNamespace(**mod.__dict__), + ns_id=id(mod.__dict__), + cached=cached, + cached_id=id(cached), + ) + + SCRIPT = textwrap.dedent(''' + {imports} + + name = {name!r} + + {prescript} + + mod = {name} + + {body} + + {postscript} + ''') + IMPORTS = textwrap.dedent(''' + import sys + ''').strip() + SCRIPT_BODY = textwrap.dedent(''' + # Capture the snapshot data. + cached = sys.modules.get(name) + snapshot = dict( + id=id(mod), + module=dict( + __file__=mod.__file__, + __spec__=dict( + name=mod.__spec__.name, + origin=mod.__spec__.origin, + ), + ), + ns=None, + ns_id=id(mod.__dict__), + cached=None, + cached_id=id(cached) if cached else None, + ) + ''').strip() + CLEANUP_SCRIPT = textwrap.dedent(''' + # Clean up the module. + sys.modules.pop(name, None) + ''').strip() + + @classmethod + def build_script(cls, name, *, + prescript=None, + import_first=False, + postscript=None, + postcleanup=False, + ): + if postcleanup is True: + postcleanup = cls.CLEANUP_SCRIPT + elif isinstance(postcleanup, str): + postcleanup = textwrap.dedent(postcleanup).strip() + postcleanup = cls.CLEANUP_SCRIPT + os.linesep + postcleanup + else: + postcleanup = '' + prescript = textwrap.dedent(prescript).strip() if prescript else '' + postscript = textwrap.dedent(postscript).strip() if postscript else '' + + if postcleanup: + if postscript: + postscript = postscript + os.linesep * 2 + postcleanup + else: + postscript = postcleanup + + if import_first: + prescript += textwrap.dedent(f''' + + # Now import the module. + assert name not in sys.modules + import {name}''') + + return cls.SCRIPT.format( + imports=cls.IMPORTS.strip(), + name=name, + prescript=prescript.strip(), + body=cls.SCRIPT_BODY.strip(), + postscript=postscript, + ) + + @classmethod + def parse(cls, text): + raw = json.loads(text) + mod = raw['module'] + mod['__spec__'] = types.SimpleNamespace(**mod['__spec__']) + raw['module'] = types.SimpleNamespace(**mod) + return cls(**raw) + + @classmethod + def from_subinterp(cls, name, interpid=None, *, pipe=None, **script_kwds): + if pipe is not None: + return cls._from_subinterp(name, interpid, pipe, script_kwds) + pipe = os.pipe() + try: + return cls._from_subinterp(name, interpid, pipe, script_kwds) + finally: + r, w = pipe + os.close(r) + os.close(w) + + @classmethod + def _from_subinterp(cls, name, interpid, pipe, script_kwargs): + r, w = pipe + + # Build the script. + postscript = textwrap.dedent(f''' + # Send the result over the pipe. + import json + import os + os.write({w}, json.dumps(snapshot).encode()) + + ''') + _postscript = script_kwargs.get('postscript') + if _postscript: + _postscript = textwrap.dedent(_postscript).lstrip() + postscript += _postscript + script_kwargs['postscript'] = postscript.strip() + script = cls.build_script(name, **script_kwargs) + + # Run the script. + if interpid is None: + ret = support.run_in_subinterp(script) + if ret != 0: + raise AssertionError(f'{ret} != 0') + else: + _interpreters.run_string(interpid, script) + + # Parse the results. + text = os.read(r, 1000) + return cls.parse(text.decode()) + + class LockTests(unittest.TestCase): """Very basic test of import lock functions.""" @@ -263,288 +428,6 @@ class ImportTests(unittest.TestCase): with self.assertRaises(ImportError): imp.load_dynamic('nonexistent', pathname) - @unittest.skip('known refleak (temporarily skipping)') - @requires_subinterpreters - @requires_load_dynamic - def test_singlephase_multiple_interpreters(self): - # Currently, for every single-phrase init module loaded - # in multiple interpreters, those interpreters share a - # PyModuleDef for that object, which can be a problem. - - # This single-phase module has global state, which is shared - # by the interpreters. - import _testsinglephase - name = _testsinglephase.__name__ - filename = _testsinglephase.__file__ - - del sys.modules[name] - _testsinglephase._clear_globals() - _testinternalcapi.clear_extension(name, filename) - init_count = _testsinglephase.initialized_count() - assert init_count == -1, (init_count,) - - def clean_up(): - _testsinglephase._clear_globals() - _testinternalcapi.clear_extension(name, filename) - self.addCleanup(clean_up) - - interp1 = _interpreters.create(isolated=False) - self.addCleanup(_interpreters.destroy, interp1) - interp2 = _interpreters.create(isolated=False) - self.addCleanup(_interpreters.destroy, interp2) - - script = textwrap.dedent(f''' - import _testsinglephase - - expected = %d - init_count = _testsinglephase.initialized_count() - if init_count != expected: - raise Exception(init_count) - - lookedup = _testsinglephase.look_up_self() - if lookedup is not _testsinglephase: - raise Exception((_testsinglephase, lookedup)) - - # Attrs set in the module init func are in m_copy. - _initialized = _testsinglephase._initialized - initialized = _testsinglephase.initialized() - if _initialized != initialized: - raise Exception((_initialized, initialized)) - - # Attrs set after loading are not in m_copy. - if hasattr(_testsinglephase, 'spam'): - raise Exception(_testsinglephase.spam) - _testsinglephase.spam = expected - ''') - - # Use an interpreter that gets destroyed right away. - ret = support.run_in_subinterp(script % 1) - self.assertEqual(ret, 0) - - # The module's init func gets run again. - # The module's globals did not get destroyed. - _interpreters.run_string(interp1, script % 2) - - # The module's init func is not run again. - # The second interpreter copies the module's m_copy. - # However, globals are still shared. - _interpreters.run_string(interp2, script % 2) - - @unittest.skip('known refleak (temporarily skipping)') - @requires_load_dynamic - def test_singlephase_variants(self): - # Exercise the most meaningful variants described in Python/import.c. - self.maxDiff = None - - basename = '_testsinglephase' - fileobj, pathname, _ = imp.find_module(basename) - fileobj.close() - - def clean_up(): - import _testsinglephase - _testsinglephase._clear_globals() - self.addCleanup(clean_up) - - def add_ext_cleanup(name): - def clean_up(): - _testinternalcapi.clear_extension(name, pathname) - self.addCleanup(clean_up) - - modules = {} - def load(name): - assert name not in modules - module = imp.load_dynamic(name, pathname) - self.assertNotIn(module, modules.values()) - modules[name] = module - return module - - def re_load(name, module): - assert sys.modules[name] is module - before = type(module)(module.__name__) - before.__dict__.update(vars(module)) - - reloaded = imp.load_dynamic(name, pathname) - - return before, reloaded - - def check_common(name, module): - summed = module.sum(1, 2) - lookedup = module.look_up_self() - initialized = module.initialized() - cached = sys.modules[name] - - # module.__name__ might not match, but the spec will. - self.assertEqual(module.__spec__.name, name) - if initialized is not None: - self.assertIsInstance(initialized, float) - self.assertGreater(initialized, 0) - self.assertEqual(summed, 3) - self.assertTrue(issubclass(module.error, Exception)) - self.assertEqual(module.int_const, 1969) - self.assertEqual(module.str_const, 'something different') - self.assertIs(cached, module) - - return lookedup, initialized, cached - - def check_direct(name, module, lookedup): - # The module has its own PyModuleDef, with a matching name. - self.assertEqual(module.__name__, name) - self.assertIs(lookedup, module) - - def check_indirect(name, module, lookedup, orig): - # The module re-uses another's PyModuleDef, with a different name. - assert orig is not module - assert orig.__name__ != name - self.assertNotEqual(module.__name__, name) - self.assertIs(lookedup, module) - - def check_basic(module, initialized): - init_count = module.initialized_count() - - self.assertIsNot(initialized, None) - self.assertIsInstance(init_count, int) - self.assertGreater(init_count, 0) - - return init_count - - def check_common_reloaded(name, module, cached, before, reloaded): - recached = sys.modules[name] - - self.assertEqual(reloaded.__spec__.name, name) - self.assertEqual(reloaded.__name__, before.__name__) - self.assertEqual(before.__dict__, module.__dict__) - self.assertIs(recached, reloaded) - - def check_basic_reloaded(module, lookedup, initialized, init_count, - before, reloaded): - relookedup = reloaded.look_up_self() - reinitialized = reloaded.initialized() - reinit_count = reloaded.initialized_count() - - self.assertIs(reloaded, module) - self.assertIs(reloaded.__dict__, module.__dict__) - # It only happens to be the same but that's good enough here. - # We really just want to verify that the re-loaded attrs - # didn't change. - self.assertIs(relookedup, lookedup) - self.assertEqual(reinitialized, initialized) - self.assertEqual(reinit_count, init_count) - - def check_with_reinit_reloaded(module, lookedup, initialized, - before, reloaded): - relookedup = reloaded.look_up_self() - reinitialized = reloaded.initialized() - - self.assertIsNot(reloaded, module) - self.assertIsNot(reloaded, module) - self.assertNotEqual(reloaded.__dict__, module.__dict__) - self.assertIs(relookedup, reloaded) - if initialized is None: - self.assertIs(reinitialized, None) - else: - self.assertGreater(reinitialized, initialized) - - # Check the "basic" module. - - name = basename - add_ext_cleanup(name) - expected_init_count = 1 - with self.subTest(name): - mod = load(name) - lookedup, initialized, cached = check_common(name, mod) - check_direct(name, mod, lookedup) - init_count = check_basic(mod, initialized) - self.assertEqual(init_count, expected_init_count) - - before, reloaded = re_load(name, mod) - check_common_reloaded(name, mod, cached, before, reloaded) - check_basic_reloaded(mod, lookedup, initialized, init_count, - before, reloaded) - basic = mod - - # Check its indirect variants. - - name = f'{basename}_basic_wrapper' - add_ext_cleanup(name) - expected_init_count += 1 - with self.subTest(name): - mod = load(name) - lookedup, initialized, cached = check_common(name, mod) - check_indirect(name, mod, lookedup, basic) - init_count = check_basic(mod, initialized) - self.assertEqual(init_count, expected_init_count) - - before, reloaded = re_load(name, mod) - check_common_reloaded(name, mod, cached, before, reloaded) - check_basic_reloaded(mod, lookedup, initialized, init_count, - before, reloaded) - - # Currently PyState_AddModule() always replaces the cached module. - self.assertIs(basic.look_up_self(), mod) - self.assertEqual(basic.initialized_count(), expected_init_count) - - # The cached module shouldn't be changed after this point. - basic_lookedup = mod - - # Check its direct variant. - - name = f'{basename}_basic_copy' - add_ext_cleanup(name) - expected_init_count += 1 - with self.subTest(name): - mod = load(name) - lookedup, initialized, cached = check_common(name, mod) - check_direct(name, mod, lookedup) - init_count = check_basic(mod, initialized) - self.assertEqual(init_count, expected_init_count) - - before, reloaded = re_load(name, mod) - check_common_reloaded(name, mod, cached, before, reloaded) - check_basic_reloaded(mod, lookedup, initialized, init_count, - before, reloaded) - - # This should change the cached module for _testsinglephase. - self.assertIs(basic.look_up_self(), basic_lookedup) - self.assertEqual(basic.initialized_count(), expected_init_count) - - # Check the non-basic variant that has no state. - - name = f'{basename}_with_reinit' - add_ext_cleanup(name) - with self.subTest(name): - mod = load(name) - lookedup, initialized, cached = check_common(name, mod) - self.assertIs(initialized, None) - check_direct(name, mod, lookedup) - - before, reloaded = re_load(name, mod) - check_common_reloaded(name, mod, cached, before, reloaded) - check_with_reinit_reloaded(mod, lookedup, initialized, - before, reloaded) - - # This should change the cached module for _testsinglephase. - self.assertIs(basic.look_up_self(), basic_lookedup) - self.assertEqual(basic.initialized_count(), expected_init_count) - - # Check the basic variant that has state. - - name = f'{basename}_with_state' - add_ext_cleanup(name) - with self.subTest(name): - mod = load(name) - lookedup, initialized, cached = check_common(name, mod) - self.assertIsNot(initialized, None) - check_direct(name, mod, lookedup) - - before, reloaded = re_load(name, mod) - check_common_reloaded(name, mod, cached, before, reloaded) - check_with_reinit_reloaded(mod, lookedup, initialized, - before, reloaded) - - # This should change the cached module for _testsinglephase. - self.assertIs(basic.look_up_self(), basic_lookedup) - self.assertEqual(basic.initialized_count(), expected_init_count) - @requires_load_dynamic def test_load_dynamic_ImportError_path(self): # Issue #1559549 added `name` and `path` attributes to ImportError @@ -737,6 +620,669 @@ class ImportTests(unittest.TestCase): check_get_builtins() +class TestSinglePhaseSnapshot(ModuleSnapshot): + + @classmethod + def from_module(cls, mod): + self = super().from_module(mod) + self.summed = mod.sum(1, 2) + self.lookedup = mod.look_up_self() + self.lookedup_id = id(self.lookedup) + self.state_initialized = mod.state_initialized() + if hasattr(mod, 'initialized_count'): + self.init_count = mod.initialized_count() + return self + + SCRIPT_BODY = ModuleSnapshot.SCRIPT_BODY + textwrap.dedent(f''' + snapshot['module'].update(dict( + int_const=mod.int_const, + str_const=mod.str_const, + _module_initialized=mod._module_initialized, + )) + snapshot.update(dict( + summed=mod.sum(1, 2), + lookedup_id=id(mod.look_up_self()), + state_initialized=mod.state_initialized(), + init_count=mod.initialized_count(), + has_spam=hasattr(mod, 'spam'), + spam=getattr(mod, 'spam', None), + )) + ''').rstrip() + + @classmethod + def parse(cls, text): + self = super().parse(text) + if not self.has_spam: + del self.spam + del self.has_spam + return self + + +@requires_load_dynamic +class SinglephaseInitTests(unittest.TestCase): + + NAME = '_testsinglephase' + + @classmethod + def setUpClass(cls): + if '-R' in sys.argv or '--huntrleaks' in sys.argv: + # https://github.com/python/cpython/issues/102251 + raise unittest.SkipTest('unresolved refleaks (see gh-102251)') + fileobj, filename, _ = imp.find_module(cls.NAME) + fileobj.close() + cls.FILE = filename + + # Start fresh. + cls.clean_up() + + def tearDown(self): + # Clean up the module. + self.clean_up() + + @classmethod + def clean_up(cls): + name = cls.NAME + filename = cls.FILE + if name in sys.modules: + if hasattr(sys.modules[name], '_clear_globals'): + assert sys.modules[name].__file__ == filename + sys.modules[name]._clear_globals() + del sys.modules[name] + # Clear all internally cached data for the extension. + _testinternalcapi.clear_extension(name, filename) + + ######################### + # helpers + + def add_module_cleanup(self, name): + def clean_up(): + # Clear all internally cached data for the extension. + _testinternalcapi.clear_extension(name, self.FILE) + self.addCleanup(clean_up) + + def load(self, name): + try: + already_loaded = self.already_loaded + except AttributeError: + already_loaded = self.already_loaded = {} + assert name not in already_loaded + mod = imp.load_dynamic(name, self.FILE) + self.assertNotIn(mod, already_loaded.values()) + already_loaded[name] = mod + return types.SimpleNamespace( + name=name, + module=mod, + snapshot=TestSinglePhaseSnapshot.from_module(mod), + ) + + def re_load(self, name, mod): + assert sys.modules[name] is mod + assert mod.__dict__ == mod.__dict__ + reloaded = imp.load_dynamic(name, self.FILE) + return types.SimpleNamespace( + name=name, + module=reloaded, + snapshot=TestSinglePhaseSnapshot.from_module(reloaded), + ) + + # subinterpreters + + def add_subinterpreter(self): + interpid = _interpreters.create(isolated=False) + _interpreters.run_string(interpid, textwrap.dedent(''' + import sys + import _testinternalcapi + ''')) + def clean_up(): + _interpreters.run_string(interpid, textwrap.dedent(f''' + name = {self.NAME!r} + if name in sys.modules: + sys.modules[name]._clear_globals() + _testinternalcapi.clear_extension(name, {self.FILE!r}) + ''')) + _interpreters.destroy(interpid) + self.addCleanup(clean_up) + return interpid + + def import_in_subinterp(self, interpid=None, *, + postscript=None, + postcleanup=False, + ): + name = self.NAME + + if postcleanup: + import_ = 'import _testinternalcapi' if interpid is None else '' + postcleanup = f''' + {import_} + mod._clear_globals() + _testinternalcapi.clear_extension(name, {self.FILE!r}) + ''' + + try: + pipe = self._pipe + except AttributeError: + r, w = pipe = self._pipe = os.pipe() + self.addCleanup(os.close, r) + self.addCleanup(os.close, w) + + snapshot = TestSinglePhaseSnapshot.from_subinterp( + name, + interpid, + pipe=pipe, + import_first=True, + postscript=postscript, + postcleanup=postcleanup, + ) + + return types.SimpleNamespace( + name=name, + module=None, + snapshot=snapshot, + ) + + # checks + + def check_common(self, loaded): + isolated = False + + mod = loaded.module + if not mod: + # It came from a subinterpreter. + isolated = True + mod = loaded.snapshot.module + # mod.__name__ might not match, but the spec will. + self.assertEqual(mod.__spec__.name, loaded.name) + self.assertEqual(mod.__file__, self.FILE) + self.assertEqual(mod.__spec__.origin, self.FILE) + if not isolated: + self.assertTrue(issubclass(mod.error, Exception)) + self.assertEqual(mod.int_const, 1969) + self.assertEqual(mod.str_const, 'something different') + self.assertIsInstance(mod._module_initialized, float) + self.assertGreater(mod._module_initialized, 0) + + snap = loaded.snapshot + self.assertEqual(snap.summed, 3) + if snap.state_initialized is not None: + self.assertIsInstance(snap.state_initialized, float) + self.assertGreater(snap.state_initialized, 0) + if isolated: + # The "looked up" module is interpreter-specific + # (interp->imports.modules_by_index was set for the module). + self.assertEqual(snap.lookedup_id, snap.id) + self.assertEqual(snap.cached_id, snap.id) + with self.assertRaises(AttributeError): + snap.spam + else: + self.assertIs(snap.lookedup, mod) + self.assertIs(snap.cached, mod) + + def check_direct(self, loaded): + # The module has its own PyModuleDef, with a matching name. + self.assertEqual(loaded.module.__name__, loaded.name) + self.assertIs(loaded.snapshot.lookedup, loaded.module) + + def check_indirect(self, loaded, orig): + # The module re-uses another's PyModuleDef, with a different name. + assert orig is not loaded.module + assert orig.__name__ != loaded.name + self.assertNotEqual(loaded.module.__name__, loaded.name) + self.assertIs(loaded.snapshot.lookedup, loaded.module) + + def check_basic(self, loaded, expected_init_count): + # m_size == -1 + # The module loads fresh the first time and copies m_copy after. + snap = loaded.snapshot + self.assertIsNot(snap.state_initialized, None) + self.assertIsInstance(snap.init_count, int) + self.assertGreater(snap.init_count, 0) + self.assertEqual(snap.init_count, expected_init_count) + + def check_with_reinit(self, loaded): + # m_size >= 0 + # The module loads fresh every time. + pass + + def check_fresh(self, loaded): + """ + The module had not been loaded before (at least since fully reset). + """ + snap = loaded.snapshot + # The module's init func was run. + # A copy of the module's __dict__ was stored in def->m_base.m_copy. + # The previous m_copy was deleted first. + # _PyRuntime.imports.extensions was set. + self.assertEqual(snap.init_count, 1) + # The global state was initialized. + # The module attrs were initialized from that state. + self.assertEqual(snap.module._module_initialized, + snap.state_initialized) + + def check_semi_fresh(self, loaded, base, prev): + """ + The module had been loaded before and then reset + (but the module global state wasn't). + """ + snap = loaded.snapshot + # The module's init func was run again. + # A copy of the module's __dict__ was stored in def->m_base.m_copy. + # The previous m_copy was deleted first. + # The module globals did not get reset. + self.assertNotEqual(snap.id, base.snapshot.id) + self.assertNotEqual(snap.id, prev.snapshot.id) + self.assertEqual(snap.init_count, prev.snapshot.init_count + 1) + # The global state was updated. + # The module attrs were initialized from that state. + self.assertEqual(snap.module._module_initialized, + snap.state_initialized) + self.assertNotEqual(snap.state_initialized, + base.snapshot.state_initialized) + self.assertNotEqual(snap.state_initialized, + prev.snapshot.state_initialized) + + def check_copied(self, loaded, base): + """ + The module had been loaded before and never reset. + """ + snap = loaded.snapshot + # The module's init func was not run again. + # The interpreter copied m_copy, as set by the other interpreter, + # with objects owned by the other interpreter. + # The module globals did not get reset. + self.assertNotEqual(snap.id, base.snapshot.id) + self.assertEqual(snap.init_count, base.snapshot.init_count) + # The global state was not updated since the init func did not run. + # The module attrs were not directly initialized from that state. + # The state and module attrs still match the previous loading. + self.assertEqual(snap.module._module_initialized, + snap.state_initialized) + self.assertEqual(snap.state_initialized, + base.snapshot.state_initialized) + + ######################### + # the tests + + def test_cleared_globals(self): + loaded = self.load(self.NAME) + _testsinglephase = loaded.module + init_before = _testsinglephase.state_initialized() + + _testsinglephase._clear_globals() + init_after = _testsinglephase.state_initialized() + init_count = _testsinglephase.initialized_count() + + self.assertGreater(init_before, 0) + self.assertEqual(init_after, 0) + self.assertEqual(init_count, -1) + + def test_variants(self): + # Exercise the most meaningful variants described in Python/import.c. + self.maxDiff = None + + # Check the "basic" module. + + name = self.NAME + expected_init_count = 1 + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.check_direct(loaded) + self.check_basic(loaded, expected_init_count) + basic = loaded.module + + # Check its indirect variants. + + name = f'{self.NAME}_basic_wrapper' + self.add_module_cleanup(name) + expected_init_count += 1 + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.check_indirect(loaded, basic) + self.check_basic(loaded, expected_init_count) + + # Currently PyState_AddModule() always replaces the cached module. + self.assertIs(basic.look_up_self(), loaded.module) + self.assertEqual(basic.initialized_count(), expected_init_count) + + # The cached module shouldn't change after this point. + basic_lookedup = loaded.module + + # Check its direct variant. + + name = f'{self.NAME}_basic_copy' + self.add_module_cleanup(name) + expected_init_count += 1 + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.check_direct(loaded) + self.check_basic(loaded, expected_init_count) + + # This should change the cached module for _testsinglephase. + self.assertIs(basic.look_up_self(), basic_lookedup) + self.assertEqual(basic.initialized_count(), expected_init_count) + + # Check the non-basic variant that has no state. + + name = f'{self.NAME}_with_reinit' + self.add_module_cleanup(name) + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.assertIs(loaded.snapshot.state_initialized, None) + self.check_direct(loaded) + self.check_with_reinit(loaded) + + # This should change the cached module for _testsinglephase. + self.assertIs(basic.look_up_self(), basic_lookedup) + self.assertEqual(basic.initialized_count(), expected_init_count) + + # Check the basic variant that has state. + + name = f'{self.NAME}_with_state' + self.add_module_cleanup(name) + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.assertIsNot(loaded.snapshot.state_initialized, None) + self.check_direct(loaded) + self.check_with_reinit(loaded) + + # This should change the cached module for _testsinglephase. + self.assertIs(basic.look_up_self(), basic_lookedup) + self.assertEqual(basic.initialized_count(), expected_init_count) + + def test_basic_reloaded(self): + # m_copy is copied into the existing module object. + # Global state is not changed. + self.maxDiff = None + + for name in [ + self.NAME, # the "basic" module + f'{self.NAME}_basic_wrapper', # the indirect variant + f'{self.NAME}_basic_copy', # the direct variant + ]: + self.add_module_cleanup(name) + with self.subTest(name): + loaded = self.load(name) + reloaded = self.re_load(name, loaded.module) + + self.check_common(loaded) + self.check_common(reloaded) + + # Make sure the original __dict__ did not get replaced. + self.assertEqual(id(loaded.module.__dict__), + loaded.snapshot.ns_id) + self.assertEqual(loaded.snapshot.ns.__dict__, + loaded.module.__dict__) + + self.assertEqual(reloaded.module.__spec__.name, reloaded.name) + self.assertEqual(reloaded.module.__name__, + reloaded.snapshot.ns.__name__) + + self.assertIs(reloaded.module, loaded.module) + self.assertIs(reloaded.module.__dict__, loaded.module.__dict__) + # It only happens to be the same but that's good enough here. + # We really just want to verify that the re-loaded attrs + # didn't change. + self.assertIs(reloaded.snapshot.lookedup, + loaded.snapshot.lookedup) + self.assertEqual(reloaded.snapshot.state_initialized, + loaded.snapshot.state_initialized) + self.assertEqual(reloaded.snapshot.init_count, + loaded.snapshot.init_count) + + self.assertIs(reloaded.snapshot.cached, reloaded.module) + + def test_with_reinit_reloaded(self): + # The module's m_init func is run again. + self.maxDiff = None + + # Keep a reference around. + basic = self.load(self.NAME) + + for name in [ + f'{self.NAME}_with_reinit', # m_size == 0 + f'{self.NAME}_with_state', # m_size > 0 + ]: + self.add_module_cleanup(name) + with self.subTest(name): + loaded = self.load(name) + reloaded = self.re_load(name, loaded.module) + + self.check_common(loaded) + self.check_common(reloaded) + + # Make sure the original __dict__ did not get replaced. + self.assertEqual(id(loaded.module.__dict__), + loaded.snapshot.ns_id) + self.assertEqual(loaded.snapshot.ns.__dict__, + loaded.module.__dict__) + + self.assertEqual(reloaded.module.__spec__.name, reloaded.name) + self.assertEqual(reloaded.module.__name__, + reloaded.snapshot.ns.__name__) + + self.assertIsNot(reloaded.module, loaded.module) + self.assertNotEqual(reloaded.module.__dict__, + loaded.module.__dict__) + self.assertIs(reloaded.snapshot.lookedup, reloaded.module) + if loaded.snapshot.state_initialized is None: + self.assertIs(reloaded.snapshot.state_initialized, None) + else: + self.assertGreater(reloaded.snapshot.state_initialized, + loaded.snapshot.state_initialized) + + self.assertIs(reloaded.snapshot.cached, reloaded.module) + + # Currently, for every single-phrase init module loaded + # in multiple interpreters, those interpreters share a + # PyModuleDef for that object, which can be a problem. + # Also, we test with a single-phase module that has global state, + # which is shared by all interpreters. + + @requires_subinterpreters + def test_basic_multiple_interpreters_main_no_reset(self): + # without resetting; already loaded in main interpreter + + # At this point: + # * alive in 0 interpreters + # * module def may or may not be loaded already + # * module def not in _PyRuntime.imports.extensions + # * mod init func has not run yet (since reset, at least) + # * m_copy not set (hasn't been loaded yet or already cleared) + # * module's global state has not been initialized yet + # (or already cleared) + + main_loaded = self.load(self.NAME) + _testsinglephase = main_loaded.module + # Attrs set after loading are not in m_copy. + _testsinglephase.spam = 'spam, spam, spam, spam, eggs, and spam' + + self.check_common(main_loaded) + self.check_fresh(main_loaded) + + interpid1 = self.add_subinterpreter() + interpid2 = self.add_subinterpreter() + + # At this point: + # * alive in 1 interpreter (main) + # * module def in _PyRuntime.imports.extensions + # * mod init func ran for the first time (since reset, at least) + # * m_copy was copied from the main interpreter (was NULL) + # * module's global state was initialized + + # Use an interpreter that gets destroyed right away. + loaded = self.import_in_subinterp() + self.check_common(loaded) + self.check_copied(loaded, main_loaded) + + # At this point: + # * alive in 1 interpreter (main) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy is NULL (claered when the interpreter was destroyed) + # (was from main interpreter) + # * module's global state was updated, not reset + + # Use a subinterpreter that sticks around. + loaded = self.import_in_subinterp(interpid1) + self.check_common(loaded) + self.check_copied(loaded, main_loaded) + + # At this point: + # * alive in 2 interpreters (main, interp1) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp1 + # * module's global state was updated, not reset + + # Use a subinterpreter while the previous one is still alive. + loaded = self.import_in_subinterp(interpid2) + self.check_common(loaded) + self.check_copied(loaded, main_loaded) + + # At this point: + # * alive in 3 interpreters (main, interp1, interp2) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp2 (was from interp1) + # * module's global state was updated, not reset + + @requires_subinterpreters + def test_basic_multiple_interpreters_deleted_no_reset(self): + # without resetting; already loaded in a deleted interpreter + + # At this point: + # * alive in 0 interpreters + # * module def may or may not be loaded already + # * module def not in _PyRuntime.imports.extensions + # * mod init func has not run yet (since reset, at least) + # * m_copy not set (hasn't been loaded yet or already cleared) + # * module's global state has not been initialized yet + # (or already cleared) + + interpid1 = self.add_subinterpreter() + interpid2 = self.add_subinterpreter() + + # First, load in the main interpreter but then completely clear it. + loaded_main = self.load(self.NAME) + loaded_main.module._clear_globals() + _testinternalcapi.clear_extension(self.NAME, self.FILE) + + # At this point: + # * alive in 0 interpreters + # * module def loaded already + # * module def was in _PyRuntime.imports.extensions, but cleared + # * mod init func ran for the first time (since reset, at least) + # * m_copy was set, but cleared (was NULL) + # * module's global state was initialized but cleared + + # Start with an interpreter that gets destroyed right away. + base = self.import_in_subinterp(postscript=''' + # Attrs set after loading are not in m_copy. + mod.spam = 'spam, spam, mash, spam, eggs, and spam' + ''') + self.check_common(base) + self.check_fresh(base) + + # At this point: + # * alive in 0 interpreters + # * module def in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy is NULL (claered when the interpreter was destroyed) + # * module's global state was initialized, not reset + + # Use a subinterpreter that sticks around. + loaded_interp1 = self.import_in_subinterp(interpid1) + self.check_common(loaded_interp1) + self.check_semi_fresh(loaded_interp1, loaded_main, base) + + # At this point: + # * alive in 1 interpreter (interp1) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp1 (was NULL) + # * module's global state was updated, not reset + + # Use a subinterpreter while the previous one is still alive. + loaded_interp2 = self.import_in_subinterp(interpid2) + self.check_common(loaded_interp2) + self.check_copied(loaded_interp2, loaded_interp1) + + # At this point: + # * alive in 2 interpreters (interp1, interp2) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp2 (was from interp1) + # * module's global state was updated, not reset + + @requires_subinterpreters + @requires_load_dynamic + def test_basic_multiple_interpreters_reset_each(self): + # resetting between each interpreter + + # At this point: + # * alive in 0 interpreters + # * module def may or may not be loaded already + # * module def not in _PyRuntime.imports.extensions + # * mod init func has not run yet (since reset, at least) + # * m_copy not set (hasn't been loaded yet or already cleared) + # * module's global state has not been initialized yet + # (or already cleared) + + interpid1 = self.add_subinterpreter() + interpid2 = self.add_subinterpreter() + + # Use an interpreter that gets destroyed right away. + loaded = self.import_in_subinterp( + postscript=''' + # Attrs set after loading are not in m_copy. + mod.spam = 'spam, spam, mash, spam, eggs, and spam' + ''', + postcleanup=True, + ) + self.check_common(loaded) + self.check_fresh(loaded) + + # At this point: + # * alive in 0 interpreters + # * module def in _PyRuntime.imports.extensions + # * mod init func ran for the first time (since reset, at least) + # * m_copy is NULL (claered when the interpreter was destroyed) + # * module's global state was initialized, not reset + + # Use a subinterpreter that sticks around. + loaded = self.import_in_subinterp(interpid1, postcleanup=True) + self.check_common(loaded) + self.check_fresh(loaded) + + # At this point: + # * alive in 1 interpreter (interp1) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp1 (was NULL) + # * module's global state was initialized, not reset + + # Use a subinterpreter while the previous one is still alive. + loaded = self.import_in_subinterp(interpid2, postcleanup=True) + self.check_common(loaded) + self.check_fresh(loaded) + + # At this point: + # * alive in 2 interpreters (interp2, interp2) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp2 (was from interp1) + # * module's global state was initialized, not reset + + class ReloadTests(unittest.TestCase): """Very basic tests to make sure that imp.reload() operates just like diff --git a/Modules/_testsinglephase.c b/Modules/_testsinglephase.c index 565221c887e..a16157702ae 100644 --- a/Modules/_testsinglephase.c +++ b/Modules/_testsinglephase.c @@ -140,7 +140,7 @@ init_module(PyObject *module, module_state *state) if (initialized == NULL) { return -1; } - if (PyModule_AddObjectRef(module, "_initialized", initialized) != 0) { + if (PyModule_AddObjectRef(module, "_module_initialized", initialized) != 0) { return -1; } @@ -148,13 +148,13 @@ init_module(PyObject *module, module_state *state) } -PyDoc_STRVAR(common_initialized_doc, -"initialized()\n\ +PyDoc_STRVAR(common_state_initialized_doc, +"state_initialized()\n\ \n\ -Return the seconds-since-epoch when the module was initialized."); +Return the seconds-since-epoch when the module state was initialized."); static PyObject * -common_initialized(PyObject *self, PyObject *Py_UNUSED(ignored)) +common_state_initialized(PyObject *self, PyObject *Py_UNUSED(ignored)) { module_state *state = get_module_state(self); if (state == NULL) { @@ -164,9 +164,9 @@ common_initialized(PyObject *self, PyObject *Py_UNUSED(ignored)) return PyFloat_FromDouble(d); } -#define INITIALIZED_METHODDEF \ - {"initialized", common_initialized, METH_NOARGS, \ - common_initialized_doc} +#define STATE_INITIALIZED_METHODDEF \ + {"state_initialized", common_state_initialized, METH_NOARGS, \ + common_state_initialized_doc} PyDoc_STRVAR(common_look_up_self_doc, @@ -265,7 +265,7 @@ basic__clear_globals(PyObject *self, PyObject *Py_UNUSED(ignored)) static PyMethodDef TestMethods_Basic[] = { LOOK_UP_SELF_METHODDEF, SUM_METHODDEF, - INITIALIZED_METHODDEF, + STATE_INITIALIZED_METHODDEF, INITIALIZED_COUNT_METHODDEF, _CLEAR_GLOBALS_METHODDEF, {NULL, NULL} /* sentinel */ @@ -360,7 +360,7 @@ PyInit__testsinglephase_basic_copy(void) static PyMethodDef TestMethods_Reinit[] = { LOOK_UP_SELF_METHODDEF, SUM_METHODDEF, - INITIALIZED_METHODDEF, + STATE_INITIALIZED_METHODDEF, {NULL, NULL} /* sentinel */ }; @@ -421,7 +421,7 @@ finally: static PyMethodDef TestMethods_WithState[] = { LOOK_UP_SELF_METHODDEF, SUM_METHODDEF, - INITIALIZED_METHODDEF, + STATE_INITIALIZED_METHODDEF, {NULL, NULL} /* sentinel */ }; diff --git a/Python/import.c b/Python/import.c index fabf03b1c5d..57d4eea1488 100644 --- a/Python/import.c +++ b/Python/import.c @@ -465,7 +465,7 @@ _modules_by_index_set(PyInterpreterState *interp, } static int -_modules_by_index_clear(PyInterpreterState *interp, PyModuleDef *def) +_modules_by_index_clear_one(PyInterpreterState *interp, PyModuleDef *def) { Py_ssize_t index = def->m_base.m_index; const char *err = _modules_by_index_check(interp, index); @@ -546,7 +546,7 @@ PyState_RemoveModule(PyModuleDef* def) "PyState_RemoveModule called on module with slots"); return -1; } - return _modules_by_index_clear(tstate->interp, def); + return _modules_by_index_clear_one(tstate->interp, def); } @@ -584,6 +584,109 @@ _PyImport_ClearModulesByIndex(PyInterpreterState *interp) /* extension modules */ /*********************/ +/* + It may help to have a big picture view of what happens + when an extension is loaded. This includes when it is imported + for the first time or via imp.load_dynamic(). + + Here's a summary, using imp.load_dynamic() as the starting point: + + 1. imp.load_dynamic() -> importlib._bootstrap._load() + 2. _load(): acquire import lock + 3. _load() -> importlib._bootstrap._load_unlocked() + 4. _load_unlocked() -> importlib._bootstrap.module_from_spec() + 5. module_from_spec() -> ExtensionFileLoader.create_module() + 6. create_module() -> _imp.create_dynamic() + (see below) + 7. module_from_spec() -> importlib._bootstrap._init_module_attrs() + 8. _load_unlocked(): sys.modules[name] = module + 9. _load_unlocked() -> ExtensionFileLoader.exec_module() + 10. exec_module() -> _imp.exec_dynamic() + (see below) + 11. _load(): release import lock + + + ...for single-phase init modules, where m_size == -1: + + (6). first time (not found in _PyRuntime.imports.extensions): + 1. _imp_create_dynamic_impl() -> import_find_extension() + 2. _imp_create_dynamic_impl() -> _PyImport_LoadDynamicModuleWithSpec() + 3. _PyImport_LoadDynamicModuleWithSpec(): load + 4. _PyImport_LoadDynamicModuleWithSpec(): call + 5. -> PyModule_Create() -> PyModule_Create2() -> PyModule_CreateInitialized() + 6. PyModule_CreateInitialized() -> PyModule_New() + 7. PyModule_CreateInitialized(): allocate mod->md_state + 8. PyModule_CreateInitialized() -> PyModule_AddFunctions() + 9. PyModule_CreateInitialized() -> PyModule_SetDocString() + 10. PyModule_CreateInitialized(): set mod->md_def + 11. : initialize the module + 12. _PyImport_LoadDynamicModuleWithSpec() -> _PyImport_CheckSubinterpIncompatibleExtensionAllowed() + 13. _PyImport_LoadDynamicModuleWithSpec(): set def->m_base.m_init + 14. _PyImport_LoadDynamicModuleWithSpec(): set __file__ + 15. _PyImport_LoadDynamicModuleWithSpec() -> _PyImport_FixupExtensionObject() + 16. _PyImport_FixupExtensionObject(): add it to interp->imports.modules_by_index + 17. _PyImport_FixupExtensionObject(): copy __dict__ into def->m_base.m_copy + 18. _PyImport_FixupExtensionObject(): add it to _PyRuntime.imports.extensions + + (6). subsequent times (found in _PyRuntime.imports.extensions): + 1. _imp_create_dynamic_impl() -> import_find_extension() + 2. import_find_extension() -> import_add_module() + 3. if name in sys.modules: use that module + 4. else: + 1. import_add_module() -> PyModule_NewObject() + 2. import_add_module(): set it on sys.modules + 5. import_find_extension(): copy the "m_copy" dict into __dict__ + 6. _imp_create_dynamic_impl() -> _PyImport_CheckSubinterpIncompatibleExtensionAllowed() + + (10). (every time): + 1. noop + + + ...for single-phase init modules, where m_size >= 0: + + (6). not main interpreter and never loaded there - every time (not found in _PyRuntime.imports.extensions): + 1-16. (same as for m_size == -1) + + (6). main interpreter - first time (not found in _PyRuntime.imports.extensions): + 1-16. (same as for m_size == -1) + 17. _PyImport_FixupExtensionObject(): add it to _PyRuntime.imports.extensions + + (6). previously loaded in main interpreter (found in _PyRuntime.imports.extensions): + 1. _imp_create_dynamic_impl() -> import_find_extension() + 2. import_find_extension(): call def->m_base.m_init + 3. import_find_extension(): add the module to sys.modules + + (10). every time: + 1. noop + + + ...for multi-phase init modules: + + (6). every time: + 1. _imp_create_dynamic_impl() -> import_find_extension() (not found) + 2. _imp_create_dynamic_impl() -> _PyImport_LoadDynamicModuleWithSpec() + 3. _PyImport_LoadDynamicModuleWithSpec(): load module init func + 4. _PyImport_LoadDynamicModuleWithSpec(): call module init func + 5. _PyImport_LoadDynamicModuleWithSpec() -> PyModule_FromDefAndSpec() + 6. PyModule_FromDefAndSpec(): gather/check moduledef slots + 7. if there's a Py_mod_create slot: + 1. PyModule_FromDefAndSpec(): call its function + 8. else: + 1. PyModule_FromDefAndSpec() -> PyModule_NewObject() + 9: PyModule_FromDefAndSpec(): set mod->md_def + 10. PyModule_FromDefAndSpec() -> _add_methods_to_object() + 11. PyModule_FromDefAndSpec() -> PyModule_SetDocString() + + (10). every time: + 1. _imp_exec_dynamic_impl() -> exec_builtin_or_dynamic() + 2. if mod->md_state == NULL (including if m_size == 0): + 1. exec_builtin_or_dynamic() -> PyModule_ExecDef() + 2. PyModule_ExecDef(): allocate mod->md_state + 3. if there's a Py_mod_exec slot: + 1. PyModule_ExecDef(): call its function + */ + + /* Make sure name is fully qualified. This is a bit of a hack: when the shared library is loaded, @@ -1007,13 +1110,17 @@ clear_singlephase_extension(PyInterpreterState *interp, /* Clear the PyState_*Module() cache entry. */ if (_modules_by_index_check(interp, def->m_base.m_index) == NULL) { - if (_modules_by_index_clear(interp, def) < 0) { + if (_modules_by_index_clear_one(interp, def) < 0) { return -1; } } /* Clear the cached module def. */ - return _extensions_cache_delete(filename, name); + if (_extensions_cache_delete(filename, name) < 0) { + return -1; + } + + return 0; }