diff --git a/Lib/imp.py b/Lib/imp.py index fc42c157658..fe850f6a001 100644 --- a/Lib/imp.py +++ b/Lib/imp.py @@ -338,8 +338,8 @@ if create_dynamic: # Issue #24748: Skip the sys.modules check in _load_module_shim; # always load new extension - spec = importlib.machinery.ModuleSpec( - name=name, loader=loader, origin=path) + spec = importlib.util.spec_from_file_location( + name, path, loader=loader) return _load(spec) else: diff --git a/Lib/test/test_imp.py b/Lib/test/test_imp.py index 2292bb20939..03e3adba221 100644 --- a/Lib/test/test_imp.py +++ b/Lib/test/test_imp.py @@ -1,4 +1,5 @@ import gc +import json import importlib import importlib.util import os @@ -11,6 +12,7 @@ from test.support import os_helper from test.support import script_helper from test.support import warnings_helper import textwrap +import types import unittest import warnings imp = warnings_helper.import_deprecated('imp') @@ -39,6 +41,169 @@ def requires_load_dynamic(meth): 'imp.load_dynamic() required')(meth) +class ModuleSnapshot(types.SimpleNamespace): + """A representation of a module for testing. + + Fields: + + * id - the module's object ID + * module - the actual module or an adequate substitute + * __file__ + * __spec__ + * name + * origin + * ns - a copy (dict) of the module's __dict__ (or None) + * ns_id - the object ID of the module's __dict__ + * cached - the sys.modules[mod.__spec__.name] entry (or None) + * cached_id - the object ID of the sys.modules entry (or None) + + In cases where the value is not available (e.g. due to serialization), + the value will be None. + """ + _fields = tuple('id module ns ns_id cached cached_id'.split()) + + @classmethod + def from_module(cls, mod): + name = mod.__spec__.name + cached = sys.modules.get(name) + return cls( + id=id(mod), + module=mod, + ns=types.SimpleNamespace(**mod.__dict__), + ns_id=id(mod.__dict__), + cached=cached, + cached_id=id(cached), + ) + + SCRIPT = textwrap.dedent(''' + {imports} + + name = {name!r} + + {prescript} + + mod = {name} + + {body} + + {postscript} + ''') + IMPORTS = textwrap.dedent(''' + import sys + ''').strip() + SCRIPT_BODY = textwrap.dedent(''' + # Capture the snapshot data. + cached = sys.modules.get(name) + snapshot = dict( + id=id(mod), + module=dict( + __file__=mod.__file__, + __spec__=dict( + name=mod.__spec__.name, + origin=mod.__spec__.origin, + ), + ), + ns=None, + ns_id=id(mod.__dict__), + cached=None, + cached_id=id(cached) if cached else None, + ) + ''').strip() + CLEANUP_SCRIPT = textwrap.dedent(''' + # Clean up the module. + sys.modules.pop(name, None) + ''').strip() + + @classmethod + def build_script(cls, name, *, + prescript=None, + import_first=False, + postscript=None, + postcleanup=False, + ): + if postcleanup is True: + postcleanup = cls.CLEANUP_SCRIPT + elif isinstance(postcleanup, str): + postcleanup = textwrap.dedent(postcleanup).strip() + postcleanup = cls.CLEANUP_SCRIPT + os.linesep + postcleanup + else: + postcleanup = '' + prescript = textwrap.dedent(prescript).strip() if prescript else '' + postscript = textwrap.dedent(postscript).strip() if postscript else '' + + if postcleanup: + if postscript: + postscript = postscript + os.linesep * 2 + postcleanup + else: + postscript = postcleanup + + if import_first: + prescript += textwrap.dedent(f''' + + # Now import the module. + assert name not in sys.modules + import {name}''') + + return cls.SCRIPT.format( + imports=cls.IMPORTS.strip(), + name=name, + prescript=prescript.strip(), + body=cls.SCRIPT_BODY.strip(), + postscript=postscript, + ) + + @classmethod + def parse(cls, text): + raw = json.loads(text) + mod = raw['module'] + mod['__spec__'] = types.SimpleNamespace(**mod['__spec__']) + raw['module'] = types.SimpleNamespace(**mod) + return cls(**raw) + + @classmethod + def from_subinterp(cls, name, interpid=None, *, pipe=None, **script_kwds): + if pipe is not None: + return cls._from_subinterp(name, interpid, pipe, script_kwds) + pipe = os.pipe() + try: + return cls._from_subinterp(name, interpid, pipe, script_kwds) + finally: + r, w = pipe + os.close(r) + os.close(w) + + @classmethod + def _from_subinterp(cls, name, interpid, pipe, script_kwargs): + r, w = pipe + + # Build the script. + postscript = textwrap.dedent(f''' + # Send the result over the pipe. + import json + import os + os.write({w}, json.dumps(snapshot).encode()) + + ''') + _postscript = script_kwargs.get('postscript') + if _postscript: + _postscript = textwrap.dedent(_postscript).lstrip() + postscript += _postscript + script_kwargs['postscript'] = postscript.strip() + script = cls.build_script(name, **script_kwargs) + + # Run the script. + if interpid is None: + ret = support.run_in_subinterp(script) + if ret != 0: + raise AssertionError(f'{ret} != 0') + else: + _interpreters.run_string(interpid, script) + + # Parse the results. + text = os.read(r, 1000) + return cls.parse(text.decode()) + + class LockTests(unittest.TestCase): """Very basic test of import lock functions.""" @@ -263,288 +428,6 @@ class ImportTests(unittest.TestCase): with self.assertRaises(ImportError): imp.load_dynamic('nonexistent', pathname) - @unittest.skip('known refleak (temporarily skipping)') - @requires_subinterpreters - @requires_load_dynamic - def test_singlephase_multiple_interpreters(self): - # Currently, for every single-phrase init module loaded - # in multiple interpreters, those interpreters share a - # PyModuleDef for that object, which can be a problem. - - # This single-phase module has global state, which is shared - # by the interpreters. - import _testsinglephase - name = _testsinglephase.__name__ - filename = _testsinglephase.__file__ - - del sys.modules[name] - _testsinglephase._clear_globals() - _testinternalcapi.clear_extension(name, filename) - init_count = _testsinglephase.initialized_count() - assert init_count == -1, (init_count,) - - def clean_up(): - _testsinglephase._clear_globals() - _testinternalcapi.clear_extension(name, filename) - self.addCleanup(clean_up) - - interp1 = _interpreters.create(isolated=False) - self.addCleanup(_interpreters.destroy, interp1) - interp2 = _interpreters.create(isolated=False) - self.addCleanup(_interpreters.destroy, interp2) - - script = textwrap.dedent(f''' - import _testsinglephase - - expected = %d - init_count = _testsinglephase.initialized_count() - if init_count != expected: - raise Exception(init_count) - - lookedup = _testsinglephase.look_up_self() - if lookedup is not _testsinglephase: - raise Exception((_testsinglephase, lookedup)) - - # Attrs set in the module init func are in m_copy. - _initialized = _testsinglephase._initialized - initialized = _testsinglephase.initialized() - if _initialized != initialized: - raise Exception((_initialized, initialized)) - - # Attrs set after loading are not in m_copy. - if hasattr(_testsinglephase, 'spam'): - raise Exception(_testsinglephase.spam) - _testsinglephase.spam = expected - ''') - - # Use an interpreter that gets destroyed right away. - ret = support.run_in_subinterp(script % 1) - self.assertEqual(ret, 0) - - # The module's init func gets run again. - # The module's globals did not get destroyed. - _interpreters.run_string(interp1, script % 2) - - # The module's init func is not run again. - # The second interpreter copies the module's m_copy. - # However, globals are still shared. - _interpreters.run_string(interp2, script % 2) - - @unittest.skip('known refleak (temporarily skipping)') - @requires_load_dynamic - def test_singlephase_variants(self): - # Exercise the most meaningful variants described in Python/import.c. - self.maxDiff = None - - basename = '_testsinglephase' - fileobj, pathname, _ = imp.find_module(basename) - fileobj.close() - - def clean_up(): - import _testsinglephase - _testsinglephase._clear_globals() - self.addCleanup(clean_up) - - def add_ext_cleanup(name): - def clean_up(): - _testinternalcapi.clear_extension(name, pathname) - self.addCleanup(clean_up) - - modules = {} - def load(name): - assert name not in modules - module = imp.load_dynamic(name, pathname) - self.assertNotIn(module, modules.values()) - modules[name] = module - return module - - def re_load(name, module): - assert sys.modules[name] is module - before = type(module)(module.__name__) - before.__dict__.update(vars(module)) - - reloaded = imp.load_dynamic(name, pathname) - - return before, reloaded - - def check_common(name, module): - summed = module.sum(1, 2) - lookedup = module.look_up_self() - initialized = module.initialized() - cached = sys.modules[name] - - # module.__name__ might not match, but the spec will. - self.assertEqual(module.__spec__.name, name) - if initialized is not None: - self.assertIsInstance(initialized, float) - self.assertGreater(initialized, 0) - self.assertEqual(summed, 3) - self.assertTrue(issubclass(module.error, Exception)) - self.assertEqual(module.int_const, 1969) - self.assertEqual(module.str_const, 'something different') - self.assertIs(cached, module) - - return lookedup, initialized, cached - - def check_direct(name, module, lookedup): - # The module has its own PyModuleDef, with a matching name. - self.assertEqual(module.__name__, name) - self.assertIs(lookedup, module) - - def check_indirect(name, module, lookedup, orig): - # The module re-uses another's PyModuleDef, with a different name. - assert orig is not module - assert orig.__name__ != name - self.assertNotEqual(module.__name__, name) - self.assertIs(lookedup, module) - - def check_basic(module, initialized): - init_count = module.initialized_count() - - self.assertIsNot(initialized, None) - self.assertIsInstance(init_count, int) - self.assertGreater(init_count, 0) - - return init_count - - def check_common_reloaded(name, module, cached, before, reloaded): - recached = sys.modules[name] - - self.assertEqual(reloaded.__spec__.name, name) - self.assertEqual(reloaded.__name__, before.__name__) - self.assertEqual(before.__dict__, module.__dict__) - self.assertIs(recached, reloaded) - - def check_basic_reloaded(module, lookedup, initialized, init_count, - before, reloaded): - relookedup = reloaded.look_up_self() - reinitialized = reloaded.initialized() - reinit_count = reloaded.initialized_count() - - self.assertIs(reloaded, module) - self.assertIs(reloaded.__dict__, module.__dict__) - # It only happens to be the same but that's good enough here. - # We really just want to verify that the re-loaded attrs - # didn't change. - self.assertIs(relookedup, lookedup) - self.assertEqual(reinitialized, initialized) - self.assertEqual(reinit_count, init_count) - - def check_with_reinit_reloaded(module, lookedup, initialized, - before, reloaded): - relookedup = reloaded.look_up_self() - reinitialized = reloaded.initialized() - - self.assertIsNot(reloaded, module) - self.assertIsNot(reloaded, module) - self.assertNotEqual(reloaded.__dict__, module.__dict__) - self.assertIs(relookedup, reloaded) - if initialized is None: - self.assertIs(reinitialized, None) - else: - self.assertGreater(reinitialized, initialized) - - # Check the "basic" module. - - name = basename - add_ext_cleanup(name) - expected_init_count = 1 - with self.subTest(name): - mod = load(name) - lookedup, initialized, cached = check_common(name, mod) - check_direct(name, mod, lookedup) - init_count = check_basic(mod, initialized) - self.assertEqual(init_count, expected_init_count) - - before, reloaded = re_load(name, mod) - check_common_reloaded(name, mod, cached, before, reloaded) - check_basic_reloaded(mod, lookedup, initialized, init_count, - before, reloaded) - basic = mod - - # Check its indirect variants. - - name = f'{basename}_basic_wrapper' - add_ext_cleanup(name) - expected_init_count += 1 - with self.subTest(name): - mod = load(name) - lookedup, initialized, cached = check_common(name, mod) - check_indirect(name, mod, lookedup, basic) - init_count = check_basic(mod, initialized) - self.assertEqual(init_count, expected_init_count) - - before, reloaded = re_load(name, mod) - check_common_reloaded(name, mod, cached, before, reloaded) - check_basic_reloaded(mod, lookedup, initialized, init_count, - before, reloaded) - - # Currently PyState_AddModule() always replaces the cached module. - self.assertIs(basic.look_up_self(), mod) - self.assertEqual(basic.initialized_count(), expected_init_count) - - # The cached module shouldn't be changed after this point. - basic_lookedup = mod - - # Check its direct variant. - - name = f'{basename}_basic_copy' - add_ext_cleanup(name) - expected_init_count += 1 - with self.subTest(name): - mod = load(name) - lookedup, initialized, cached = check_common(name, mod) - check_direct(name, mod, lookedup) - init_count = check_basic(mod, initialized) - self.assertEqual(init_count, expected_init_count) - - before, reloaded = re_load(name, mod) - check_common_reloaded(name, mod, cached, before, reloaded) - check_basic_reloaded(mod, lookedup, initialized, init_count, - before, reloaded) - - # This should change the cached module for _testsinglephase. - self.assertIs(basic.look_up_self(), basic_lookedup) - self.assertEqual(basic.initialized_count(), expected_init_count) - - # Check the non-basic variant that has no state. - - name = f'{basename}_with_reinit' - add_ext_cleanup(name) - with self.subTest(name): - mod = load(name) - lookedup, initialized, cached = check_common(name, mod) - self.assertIs(initialized, None) - check_direct(name, mod, lookedup) - - before, reloaded = re_load(name, mod) - check_common_reloaded(name, mod, cached, before, reloaded) - check_with_reinit_reloaded(mod, lookedup, initialized, - before, reloaded) - - # This should change the cached module for _testsinglephase. - self.assertIs(basic.look_up_self(), basic_lookedup) - self.assertEqual(basic.initialized_count(), expected_init_count) - - # Check the basic variant that has state. - - name = f'{basename}_with_state' - add_ext_cleanup(name) - with self.subTest(name): - mod = load(name) - lookedup, initialized, cached = check_common(name, mod) - self.assertIsNot(initialized, None) - check_direct(name, mod, lookedup) - - before, reloaded = re_load(name, mod) - check_common_reloaded(name, mod, cached, before, reloaded) - check_with_reinit_reloaded(mod, lookedup, initialized, - before, reloaded) - - # This should change the cached module for _testsinglephase. - self.assertIs(basic.look_up_self(), basic_lookedup) - self.assertEqual(basic.initialized_count(), expected_init_count) - @requires_load_dynamic def test_load_dynamic_ImportError_path(self): # Issue #1559549 added `name` and `path` attributes to ImportError @@ -737,6 +620,669 @@ class ImportTests(unittest.TestCase): check_get_builtins() +class TestSinglePhaseSnapshot(ModuleSnapshot): + + @classmethod + def from_module(cls, mod): + self = super().from_module(mod) + self.summed = mod.sum(1, 2) + self.lookedup = mod.look_up_self() + self.lookedup_id = id(self.lookedup) + self.state_initialized = mod.state_initialized() + if hasattr(mod, 'initialized_count'): + self.init_count = mod.initialized_count() + return self + + SCRIPT_BODY = ModuleSnapshot.SCRIPT_BODY + textwrap.dedent(f''' + snapshot['module'].update(dict( + int_const=mod.int_const, + str_const=mod.str_const, + _module_initialized=mod._module_initialized, + )) + snapshot.update(dict( + summed=mod.sum(1, 2), + lookedup_id=id(mod.look_up_self()), + state_initialized=mod.state_initialized(), + init_count=mod.initialized_count(), + has_spam=hasattr(mod, 'spam'), + spam=getattr(mod, 'spam', None), + )) + ''').rstrip() + + @classmethod + def parse(cls, text): + self = super().parse(text) + if not self.has_spam: + del self.spam + del self.has_spam + return self + + +@requires_load_dynamic +class SinglephaseInitTests(unittest.TestCase): + + NAME = '_testsinglephase' + + @classmethod + def setUpClass(cls): + if '-R' in sys.argv or '--huntrleaks' in sys.argv: + # https://github.com/python/cpython/issues/102251 + raise unittest.SkipTest('unresolved refleaks (see gh-102251)') + fileobj, filename, _ = imp.find_module(cls.NAME) + fileobj.close() + cls.FILE = filename + + # Start fresh. + cls.clean_up() + + def tearDown(self): + # Clean up the module. + self.clean_up() + + @classmethod + def clean_up(cls): + name = cls.NAME + filename = cls.FILE + if name in sys.modules: + if hasattr(sys.modules[name], '_clear_globals'): + assert sys.modules[name].__file__ == filename + sys.modules[name]._clear_globals() + del sys.modules[name] + # Clear all internally cached data for the extension. + _testinternalcapi.clear_extension(name, filename) + + ######################### + # helpers + + def add_module_cleanup(self, name): + def clean_up(): + # Clear all internally cached data for the extension. + _testinternalcapi.clear_extension(name, self.FILE) + self.addCleanup(clean_up) + + def load(self, name): + try: + already_loaded = self.already_loaded + except AttributeError: + already_loaded = self.already_loaded = {} + assert name not in already_loaded + mod = imp.load_dynamic(name, self.FILE) + self.assertNotIn(mod, already_loaded.values()) + already_loaded[name] = mod + return types.SimpleNamespace( + name=name, + module=mod, + snapshot=TestSinglePhaseSnapshot.from_module(mod), + ) + + def re_load(self, name, mod): + assert sys.modules[name] is mod + assert mod.__dict__ == mod.__dict__ + reloaded = imp.load_dynamic(name, self.FILE) + return types.SimpleNamespace( + name=name, + module=reloaded, + snapshot=TestSinglePhaseSnapshot.from_module(reloaded), + ) + + # subinterpreters + + def add_subinterpreter(self): + interpid = _interpreters.create(isolated=False) + _interpreters.run_string(interpid, textwrap.dedent(''' + import sys + import _testinternalcapi + ''')) + def clean_up(): + _interpreters.run_string(interpid, textwrap.dedent(f''' + name = {self.NAME!r} + if name in sys.modules: + sys.modules[name]._clear_globals() + _testinternalcapi.clear_extension(name, {self.FILE!r}) + ''')) + _interpreters.destroy(interpid) + self.addCleanup(clean_up) + return interpid + + def import_in_subinterp(self, interpid=None, *, + postscript=None, + postcleanup=False, + ): + name = self.NAME + + if postcleanup: + import_ = 'import _testinternalcapi' if interpid is None else '' + postcleanup = f''' + {import_} + mod._clear_globals() + _testinternalcapi.clear_extension(name, {self.FILE!r}) + ''' + + try: + pipe = self._pipe + except AttributeError: + r, w = pipe = self._pipe = os.pipe() + self.addCleanup(os.close, r) + self.addCleanup(os.close, w) + + snapshot = TestSinglePhaseSnapshot.from_subinterp( + name, + interpid, + pipe=pipe, + import_first=True, + postscript=postscript, + postcleanup=postcleanup, + ) + + return types.SimpleNamespace( + name=name, + module=None, + snapshot=snapshot, + ) + + # checks + + def check_common(self, loaded): + isolated = False + + mod = loaded.module + if not mod: + # It came from a subinterpreter. + isolated = True + mod = loaded.snapshot.module + # mod.__name__ might not match, but the spec will. + self.assertEqual(mod.__spec__.name, loaded.name) + self.assertEqual(mod.__file__, self.FILE) + self.assertEqual(mod.__spec__.origin, self.FILE) + if not isolated: + self.assertTrue(issubclass(mod.error, Exception)) + self.assertEqual(mod.int_const, 1969) + self.assertEqual(mod.str_const, 'something different') + self.assertIsInstance(mod._module_initialized, float) + self.assertGreater(mod._module_initialized, 0) + + snap = loaded.snapshot + self.assertEqual(snap.summed, 3) + if snap.state_initialized is not None: + self.assertIsInstance(snap.state_initialized, float) + self.assertGreater(snap.state_initialized, 0) + if isolated: + # The "looked up" module is interpreter-specific + # (interp->imports.modules_by_index was set for the module). + self.assertEqual(snap.lookedup_id, snap.id) + self.assertEqual(snap.cached_id, snap.id) + with self.assertRaises(AttributeError): + snap.spam + else: + self.assertIs(snap.lookedup, mod) + self.assertIs(snap.cached, mod) + + def check_direct(self, loaded): + # The module has its own PyModuleDef, with a matching name. + self.assertEqual(loaded.module.__name__, loaded.name) + self.assertIs(loaded.snapshot.lookedup, loaded.module) + + def check_indirect(self, loaded, orig): + # The module re-uses another's PyModuleDef, with a different name. + assert orig is not loaded.module + assert orig.__name__ != loaded.name + self.assertNotEqual(loaded.module.__name__, loaded.name) + self.assertIs(loaded.snapshot.lookedup, loaded.module) + + def check_basic(self, loaded, expected_init_count): + # m_size == -1 + # The module loads fresh the first time and copies m_copy after. + snap = loaded.snapshot + self.assertIsNot(snap.state_initialized, None) + self.assertIsInstance(snap.init_count, int) + self.assertGreater(snap.init_count, 0) + self.assertEqual(snap.init_count, expected_init_count) + + def check_with_reinit(self, loaded): + # m_size >= 0 + # The module loads fresh every time. + pass + + def check_fresh(self, loaded): + """ + The module had not been loaded before (at least since fully reset). + """ + snap = loaded.snapshot + # The module's init func was run. + # A copy of the module's __dict__ was stored in def->m_base.m_copy. + # The previous m_copy was deleted first. + # _PyRuntime.imports.extensions was set. + self.assertEqual(snap.init_count, 1) + # The global state was initialized. + # The module attrs were initialized from that state. + self.assertEqual(snap.module._module_initialized, + snap.state_initialized) + + def check_semi_fresh(self, loaded, base, prev): + """ + The module had been loaded before and then reset + (but the module global state wasn't). + """ + snap = loaded.snapshot + # The module's init func was run again. + # A copy of the module's __dict__ was stored in def->m_base.m_copy. + # The previous m_copy was deleted first. + # The module globals did not get reset. + self.assertNotEqual(snap.id, base.snapshot.id) + self.assertNotEqual(snap.id, prev.snapshot.id) + self.assertEqual(snap.init_count, prev.snapshot.init_count + 1) + # The global state was updated. + # The module attrs were initialized from that state. + self.assertEqual(snap.module._module_initialized, + snap.state_initialized) + self.assertNotEqual(snap.state_initialized, + base.snapshot.state_initialized) + self.assertNotEqual(snap.state_initialized, + prev.snapshot.state_initialized) + + def check_copied(self, loaded, base): + """ + The module had been loaded before and never reset. + """ + snap = loaded.snapshot + # The module's init func was not run again. + # The interpreter copied m_copy, as set by the other interpreter, + # with objects owned by the other interpreter. + # The module globals did not get reset. + self.assertNotEqual(snap.id, base.snapshot.id) + self.assertEqual(snap.init_count, base.snapshot.init_count) + # The global state was not updated since the init func did not run. + # The module attrs were not directly initialized from that state. + # The state and module attrs still match the previous loading. + self.assertEqual(snap.module._module_initialized, + snap.state_initialized) + self.assertEqual(snap.state_initialized, + base.snapshot.state_initialized) + + ######################### + # the tests + + def test_cleared_globals(self): + loaded = self.load(self.NAME) + _testsinglephase = loaded.module + init_before = _testsinglephase.state_initialized() + + _testsinglephase._clear_globals() + init_after = _testsinglephase.state_initialized() + init_count = _testsinglephase.initialized_count() + + self.assertGreater(init_before, 0) + self.assertEqual(init_after, 0) + self.assertEqual(init_count, -1) + + def test_variants(self): + # Exercise the most meaningful variants described in Python/import.c. + self.maxDiff = None + + # Check the "basic" module. + + name = self.NAME + expected_init_count = 1 + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.check_direct(loaded) + self.check_basic(loaded, expected_init_count) + basic = loaded.module + + # Check its indirect variants. + + name = f'{self.NAME}_basic_wrapper' + self.add_module_cleanup(name) + expected_init_count += 1 + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.check_indirect(loaded, basic) + self.check_basic(loaded, expected_init_count) + + # Currently PyState_AddModule() always replaces the cached module. + self.assertIs(basic.look_up_self(), loaded.module) + self.assertEqual(basic.initialized_count(), expected_init_count) + + # The cached module shouldn't change after this point. + basic_lookedup = loaded.module + + # Check its direct variant. + + name = f'{self.NAME}_basic_copy' + self.add_module_cleanup(name) + expected_init_count += 1 + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.check_direct(loaded) + self.check_basic(loaded, expected_init_count) + + # This should change the cached module for _testsinglephase. + self.assertIs(basic.look_up_self(), basic_lookedup) + self.assertEqual(basic.initialized_count(), expected_init_count) + + # Check the non-basic variant that has no state. + + name = f'{self.NAME}_with_reinit' + self.add_module_cleanup(name) + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.assertIs(loaded.snapshot.state_initialized, None) + self.check_direct(loaded) + self.check_with_reinit(loaded) + + # This should change the cached module for _testsinglephase. + self.assertIs(basic.look_up_self(), basic_lookedup) + self.assertEqual(basic.initialized_count(), expected_init_count) + + # Check the basic variant that has state. + + name = f'{self.NAME}_with_state' + self.add_module_cleanup(name) + with self.subTest(name): + loaded = self.load(name) + + self.check_common(loaded) + self.assertIsNot(loaded.snapshot.state_initialized, None) + self.check_direct(loaded) + self.check_with_reinit(loaded) + + # This should change the cached module for _testsinglephase. + self.assertIs(basic.look_up_self(), basic_lookedup) + self.assertEqual(basic.initialized_count(), expected_init_count) + + def test_basic_reloaded(self): + # m_copy is copied into the existing module object. + # Global state is not changed. + self.maxDiff = None + + for name in [ + self.NAME, # the "basic" module + f'{self.NAME}_basic_wrapper', # the indirect variant + f'{self.NAME}_basic_copy', # the direct variant + ]: + self.add_module_cleanup(name) + with self.subTest(name): + loaded = self.load(name) + reloaded = self.re_load(name, loaded.module) + + self.check_common(loaded) + self.check_common(reloaded) + + # Make sure the original __dict__ did not get replaced. + self.assertEqual(id(loaded.module.__dict__), + loaded.snapshot.ns_id) + self.assertEqual(loaded.snapshot.ns.__dict__, + loaded.module.__dict__) + + self.assertEqual(reloaded.module.__spec__.name, reloaded.name) + self.assertEqual(reloaded.module.__name__, + reloaded.snapshot.ns.__name__) + + self.assertIs(reloaded.module, loaded.module) + self.assertIs(reloaded.module.__dict__, loaded.module.__dict__) + # It only happens to be the same but that's good enough here. + # We really just want to verify that the re-loaded attrs + # didn't change. + self.assertIs(reloaded.snapshot.lookedup, + loaded.snapshot.lookedup) + self.assertEqual(reloaded.snapshot.state_initialized, + loaded.snapshot.state_initialized) + self.assertEqual(reloaded.snapshot.init_count, + loaded.snapshot.init_count) + + self.assertIs(reloaded.snapshot.cached, reloaded.module) + + def test_with_reinit_reloaded(self): + # The module's m_init func is run again. + self.maxDiff = None + + # Keep a reference around. + basic = self.load(self.NAME) + + for name in [ + f'{self.NAME}_with_reinit', # m_size == 0 + f'{self.NAME}_with_state', # m_size > 0 + ]: + self.add_module_cleanup(name) + with self.subTest(name): + loaded = self.load(name) + reloaded = self.re_load(name, loaded.module) + + self.check_common(loaded) + self.check_common(reloaded) + + # Make sure the original __dict__ did not get replaced. + self.assertEqual(id(loaded.module.__dict__), + loaded.snapshot.ns_id) + self.assertEqual(loaded.snapshot.ns.__dict__, + loaded.module.__dict__) + + self.assertEqual(reloaded.module.__spec__.name, reloaded.name) + self.assertEqual(reloaded.module.__name__, + reloaded.snapshot.ns.__name__) + + self.assertIsNot(reloaded.module, loaded.module) + self.assertNotEqual(reloaded.module.__dict__, + loaded.module.__dict__) + self.assertIs(reloaded.snapshot.lookedup, reloaded.module) + if loaded.snapshot.state_initialized is None: + self.assertIs(reloaded.snapshot.state_initialized, None) + else: + self.assertGreater(reloaded.snapshot.state_initialized, + loaded.snapshot.state_initialized) + + self.assertIs(reloaded.snapshot.cached, reloaded.module) + + # Currently, for every single-phrase init module loaded + # in multiple interpreters, those interpreters share a + # PyModuleDef for that object, which can be a problem. + # Also, we test with a single-phase module that has global state, + # which is shared by all interpreters. + + @requires_subinterpreters + def test_basic_multiple_interpreters_main_no_reset(self): + # without resetting; already loaded in main interpreter + + # At this point: + # * alive in 0 interpreters + # * module def may or may not be loaded already + # * module def not in _PyRuntime.imports.extensions + # * mod init func has not run yet (since reset, at least) + # * m_copy not set (hasn't been loaded yet or already cleared) + # * module's global state has not been initialized yet + # (or already cleared) + + main_loaded = self.load(self.NAME) + _testsinglephase = main_loaded.module + # Attrs set after loading are not in m_copy. + _testsinglephase.spam = 'spam, spam, spam, spam, eggs, and spam' + + self.check_common(main_loaded) + self.check_fresh(main_loaded) + + interpid1 = self.add_subinterpreter() + interpid2 = self.add_subinterpreter() + + # At this point: + # * alive in 1 interpreter (main) + # * module def in _PyRuntime.imports.extensions + # * mod init func ran for the first time (since reset, at least) + # * m_copy was copied from the main interpreter (was NULL) + # * module's global state was initialized + + # Use an interpreter that gets destroyed right away. + loaded = self.import_in_subinterp() + self.check_common(loaded) + self.check_copied(loaded, main_loaded) + + # At this point: + # * alive in 1 interpreter (main) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy is NULL (claered when the interpreter was destroyed) + # (was from main interpreter) + # * module's global state was updated, not reset + + # Use a subinterpreter that sticks around. + loaded = self.import_in_subinterp(interpid1) + self.check_common(loaded) + self.check_copied(loaded, main_loaded) + + # At this point: + # * alive in 2 interpreters (main, interp1) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp1 + # * module's global state was updated, not reset + + # Use a subinterpreter while the previous one is still alive. + loaded = self.import_in_subinterp(interpid2) + self.check_common(loaded) + self.check_copied(loaded, main_loaded) + + # At this point: + # * alive in 3 interpreters (main, interp1, interp2) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp2 (was from interp1) + # * module's global state was updated, not reset + + @requires_subinterpreters + def test_basic_multiple_interpreters_deleted_no_reset(self): + # without resetting; already loaded in a deleted interpreter + + # At this point: + # * alive in 0 interpreters + # * module def may or may not be loaded already + # * module def not in _PyRuntime.imports.extensions + # * mod init func has not run yet (since reset, at least) + # * m_copy not set (hasn't been loaded yet or already cleared) + # * module's global state has not been initialized yet + # (or already cleared) + + interpid1 = self.add_subinterpreter() + interpid2 = self.add_subinterpreter() + + # First, load in the main interpreter but then completely clear it. + loaded_main = self.load(self.NAME) + loaded_main.module._clear_globals() + _testinternalcapi.clear_extension(self.NAME, self.FILE) + + # At this point: + # * alive in 0 interpreters + # * module def loaded already + # * module def was in _PyRuntime.imports.extensions, but cleared + # * mod init func ran for the first time (since reset, at least) + # * m_copy was set, but cleared (was NULL) + # * module's global state was initialized but cleared + + # Start with an interpreter that gets destroyed right away. + base = self.import_in_subinterp(postscript=''' + # Attrs set after loading are not in m_copy. + mod.spam = 'spam, spam, mash, spam, eggs, and spam' + ''') + self.check_common(base) + self.check_fresh(base) + + # At this point: + # * alive in 0 interpreters + # * module def in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy is NULL (claered when the interpreter was destroyed) + # * module's global state was initialized, not reset + + # Use a subinterpreter that sticks around. + loaded_interp1 = self.import_in_subinterp(interpid1) + self.check_common(loaded_interp1) + self.check_semi_fresh(loaded_interp1, loaded_main, base) + + # At this point: + # * alive in 1 interpreter (interp1) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp1 (was NULL) + # * module's global state was updated, not reset + + # Use a subinterpreter while the previous one is still alive. + loaded_interp2 = self.import_in_subinterp(interpid2) + self.check_common(loaded_interp2) + self.check_copied(loaded_interp2, loaded_interp1) + + # At this point: + # * alive in 2 interpreters (interp1, interp2) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp2 (was from interp1) + # * module's global state was updated, not reset + + @requires_subinterpreters + @requires_load_dynamic + def test_basic_multiple_interpreters_reset_each(self): + # resetting between each interpreter + + # At this point: + # * alive in 0 interpreters + # * module def may or may not be loaded already + # * module def not in _PyRuntime.imports.extensions + # * mod init func has not run yet (since reset, at least) + # * m_copy not set (hasn't been loaded yet or already cleared) + # * module's global state has not been initialized yet + # (or already cleared) + + interpid1 = self.add_subinterpreter() + interpid2 = self.add_subinterpreter() + + # Use an interpreter that gets destroyed right away. + loaded = self.import_in_subinterp( + postscript=''' + # Attrs set after loading are not in m_copy. + mod.spam = 'spam, spam, mash, spam, eggs, and spam' + ''', + postcleanup=True, + ) + self.check_common(loaded) + self.check_fresh(loaded) + + # At this point: + # * alive in 0 interpreters + # * module def in _PyRuntime.imports.extensions + # * mod init func ran for the first time (since reset, at least) + # * m_copy is NULL (claered when the interpreter was destroyed) + # * module's global state was initialized, not reset + + # Use a subinterpreter that sticks around. + loaded = self.import_in_subinterp(interpid1, postcleanup=True) + self.check_common(loaded) + self.check_fresh(loaded) + + # At this point: + # * alive in 1 interpreter (interp1) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp1 (was NULL) + # * module's global state was initialized, not reset + + # Use a subinterpreter while the previous one is still alive. + loaded = self.import_in_subinterp(interpid2, postcleanup=True) + self.check_common(loaded) + self.check_fresh(loaded) + + # At this point: + # * alive in 2 interpreters (interp2, interp2) + # * module def still in _PyRuntime.imports.extensions + # * mod init func ran again + # * m_copy was copied from interp2 (was from interp1) + # * module's global state was initialized, not reset + + class ReloadTests(unittest.TestCase): """Very basic tests to make sure that imp.reload() operates just like diff --git a/Modules/_testsinglephase.c b/Modules/_testsinglephase.c index 565221c887e..a16157702ae 100644 --- a/Modules/_testsinglephase.c +++ b/Modules/_testsinglephase.c @@ -140,7 +140,7 @@ init_module(PyObject *module, module_state *state) if (initialized == NULL) { return -1; } - if (PyModule_AddObjectRef(module, "_initialized", initialized) != 0) { + if (PyModule_AddObjectRef(module, "_module_initialized", initialized) != 0) { return -1; } @@ -148,13 +148,13 @@ init_module(PyObject *module, module_state *state) } -PyDoc_STRVAR(common_initialized_doc, -"initialized()\n\ +PyDoc_STRVAR(common_state_initialized_doc, +"state_initialized()\n\ \n\ -Return the seconds-since-epoch when the module was initialized."); +Return the seconds-since-epoch when the module state was initialized."); static PyObject * -common_initialized(PyObject *self, PyObject *Py_UNUSED(ignored)) +common_state_initialized(PyObject *self, PyObject *Py_UNUSED(ignored)) { module_state *state = get_module_state(self); if (state == NULL) { @@ -164,9 +164,9 @@ common_initialized(PyObject *self, PyObject *Py_UNUSED(ignored)) return PyFloat_FromDouble(d); } -#define INITIALIZED_METHODDEF \ - {"initialized", common_initialized, METH_NOARGS, \ - common_initialized_doc} +#define STATE_INITIALIZED_METHODDEF \ + {"state_initialized", common_state_initialized, METH_NOARGS, \ + common_state_initialized_doc} PyDoc_STRVAR(common_look_up_self_doc, @@ -265,7 +265,7 @@ basic__clear_globals(PyObject *self, PyObject *Py_UNUSED(ignored)) static PyMethodDef TestMethods_Basic[] = { LOOK_UP_SELF_METHODDEF, SUM_METHODDEF, - INITIALIZED_METHODDEF, + STATE_INITIALIZED_METHODDEF, INITIALIZED_COUNT_METHODDEF, _CLEAR_GLOBALS_METHODDEF, {NULL, NULL} /* sentinel */ @@ -360,7 +360,7 @@ PyInit__testsinglephase_basic_copy(void) static PyMethodDef TestMethods_Reinit[] = { LOOK_UP_SELF_METHODDEF, SUM_METHODDEF, - INITIALIZED_METHODDEF, + STATE_INITIALIZED_METHODDEF, {NULL, NULL} /* sentinel */ }; @@ -421,7 +421,7 @@ finally: static PyMethodDef TestMethods_WithState[] = { LOOK_UP_SELF_METHODDEF, SUM_METHODDEF, - INITIALIZED_METHODDEF, + STATE_INITIALIZED_METHODDEF, {NULL, NULL} /* sentinel */ }; diff --git a/Python/import.c b/Python/import.c index fabf03b1c5d..57d4eea1488 100644 --- a/Python/import.c +++ b/Python/import.c @@ -465,7 +465,7 @@ _modules_by_index_set(PyInterpreterState *interp, } static int -_modules_by_index_clear(PyInterpreterState *interp, PyModuleDef *def) +_modules_by_index_clear_one(PyInterpreterState *interp, PyModuleDef *def) { Py_ssize_t index = def->m_base.m_index; const char *err = _modules_by_index_check(interp, index); @@ -546,7 +546,7 @@ PyState_RemoveModule(PyModuleDef* def) "PyState_RemoveModule called on module with slots"); return -1; } - return _modules_by_index_clear(tstate->interp, def); + return _modules_by_index_clear_one(tstate->interp, def); } @@ -584,6 +584,109 @@ _PyImport_ClearModulesByIndex(PyInterpreterState *interp) /* extension modules */ /*********************/ +/* + It may help to have a big picture view of what happens + when an extension is loaded. This includes when it is imported + for the first time or via imp.load_dynamic(). + + Here's a summary, using imp.load_dynamic() as the starting point: + + 1. imp.load_dynamic() -> importlib._bootstrap._load() + 2. _load(): acquire import lock + 3. _load() -> importlib._bootstrap._load_unlocked() + 4. _load_unlocked() -> importlib._bootstrap.module_from_spec() + 5. module_from_spec() -> ExtensionFileLoader.create_module() + 6. create_module() -> _imp.create_dynamic() + (see below) + 7. module_from_spec() -> importlib._bootstrap._init_module_attrs() + 8. _load_unlocked(): sys.modules[name] = module + 9. _load_unlocked() -> ExtensionFileLoader.exec_module() + 10. exec_module() -> _imp.exec_dynamic() + (see below) + 11. _load(): release import lock + + + ...for single-phase init modules, where m_size == -1: + + (6). first time (not found in _PyRuntime.imports.extensions): + 1. _imp_create_dynamic_impl() -> import_find_extension() + 2. _imp_create_dynamic_impl() -> _PyImport_LoadDynamicModuleWithSpec() + 3. _PyImport_LoadDynamicModuleWithSpec(): load + 4. _PyImport_LoadDynamicModuleWithSpec(): call + 5. -> PyModule_Create() -> PyModule_Create2() -> PyModule_CreateInitialized() + 6. PyModule_CreateInitialized() -> PyModule_New() + 7. PyModule_CreateInitialized(): allocate mod->md_state + 8. PyModule_CreateInitialized() -> PyModule_AddFunctions() + 9. PyModule_CreateInitialized() -> PyModule_SetDocString() + 10. PyModule_CreateInitialized(): set mod->md_def + 11. : initialize the module + 12. _PyImport_LoadDynamicModuleWithSpec() -> _PyImport_CheckSubinterpIncompatibleExtensionAllowed() + 13. _PyImport_LoadDynamicModuleWithSpec(): set def->m_base.m_init + 14. _PyImport_LoadDynamicModuleWithSpec(): set __file__ + 15. _PyImport_LoadDynamicModuleWithSpec() -> _PyImport_FixupExtensionObject() + 16. _PyImport_FixupExtensionObject(): add it to interp->imports.modules_by_index + 17. _PyImport_FixupExtensionObject(): copy __dict__ into def->m_base.m_copy + 18. _PyImport_FixupExtensionObject(): add it to _PyRuntime.imports.extensions + + (6). subsequent times (found in _PyRuntime.imports.extensions): + 1. _imp_create_dynamic_impl() -> import_find_extension() + 2. import_find_extension() -> import_add_module() + 3. if name in sys.modules: use that module + 4. else: + 1. import_add_module() -> PyModule_NewObject() + 2. import_add_module(): set it on sys.modules + 5. import_find_extension(): copy the "m_copy" dict into __dict__ + 6. _imp_create_dynamic_impl() -> _PyImport_CheckSubinterpIncompatibleExtensionAllowed() + + (10). (every time): + 1. noop + + + ...for single-phase init modules, where m_size >= 0: + + (6). not main interpreter and never loaded there - every time (not found in _PyRuntime.imports.extensions): + 1-16. (same as for m_size == -1) + + (6). main interpreter - first time (not found in _PyRuntime.imports.extensions): + 1-16. (same as for m_size == -1) + 17. _PyImport_FixupExtensionObject(): add it to _PyRuntime.imports.extensions + + (6). previously loaded in main interpreter (found in _PyRuntime.imports.extensions): + 1. _imp_create_dynamic_impl() -> import_find_extension() + 2. import_find_extension(): call def->m_base.m_init + 3. import_find_extension(): add the module to sys.modules + + (10). every time: + 1. noop + + + ...for multi-phase init modules: + + (6). every time: + 1. _imp_create_dynamic_impl() -> import_find_extension() (not found) + 2. _imp_create_dynamic_impl() -> _PyImport_LoadDynamicModuleWithSpec() + 3. _PyImport_LoadDynamicModuleWithSpec(): load module init func + 4. _PyImport_LoadDynamicModuleWithSpec(): call module init func + 5. _PyImport_LoadDynamicModuleWithSpec() -> PyModule_FromDefAndSpec() + 6. PyModule_FromDefAndSpec(): gather/check moduledef slots + 7. if there's a Py_mod_create slot: + 1. PyModule_FromDefAndSpec(): call its function + 8. else: + 1. PyModule_FromDefAndSpec() -> PyModule_NewObject() + 9: PyModule_FromDefAndSpec(): set mod->md_def + 10. PyModule_FromDefAndSpec() -> _add_methods_to_object() + 11. PyModule_FromDefAndSpec() -> PyModule_SetDocString() + + (10). every time: + 1. _imp_exec_dynamic_impl() -> exec_builtin_or_dynamic() + 2. if mod->md_state == NULL (including if m_size == 0): + 1. exec_builtin_or_dynamic() -> PyModule_ExecDef() + 2. PyModule_ExecDef(): allocate mod->md_state + 3. if there's a Py_mod_exec slot: + 1. PyModule_ExecDef(): call its function + */ + + /* Make sure name is fully qualified. This is a bit of a hack: when the shared library is loaded, @@ -1007,13 +1110,17 @@ clear_singlephase_extension(PyInterpreterState *interp, /* Clear the PyState_*Module() cache entry. */ if (_modules_by_index_check(interp, def->m_base.m_index) == NULL) { - if (_modules_by_index_clear(interp, def) < 0) { + if (_modules_by_index_clear_one(interp, def) < 0) { return -1; } } /* Clear the cached module def. */ - return _extensions_cache_delete(filename, name); + if (_extensions_cache_delete(filename, name) < 0) { + return -1; + } + + return 0; }