Issue #17621: Introduce importlib.util.LazyLoader.

This commit is contained in:
Brett Cannon 2014-04-04 13:53:38 -04:00
parent f22b2f0cf4
commit a04dbe4fe7
5 changed files with 266 additions and 1 deletions

View File

@ -1191,3 +1191,38 @@ an :term:`importer`.
module will be file-based.
.. versionadded:: 3.4
.. class:: LazyLoader(loader)
A class which postpones the execution of the loader of a module until the
module has an attribute accessed.
This class **only** works with loaders that define
:meth:`importlib.abc.Loader.exec_module` as control over what module type
is used for the module is required. For the same reasons, the loader
**cannot** define :meth:`importlib.abc.Loader.create_module`. Finally,
modules which substitute the object placed into :attr:`sys.modules` will
not work as there is no way to properly replace the module references
throughout the interpreter safely; :exc:`ValueError` is raised if such a
substitution is detected.
.. note::
For projects where startup time is critical, this class allows for
potentially minimizing the cost of loading a module if it is never used.
For projects where startup time is not essential then use of this class is
**heavily** discouraged due to error messages created during loading being
postponed and thus occurring out of context.
.. versionadded:: 3.5
.. classmethod:: factory(loader)
A static method which returns a callable that creates a lazy loader. This
is meant to be used in situations where the loader is passed by class
instead of by instance.
::
suffixes = importlib.machinery.SOURCE_SUFFIXES
loader = importlib.machinery.SourceFileLoader
lazy_loader = importlib.util.LazyLoader.factory(loader)
finder = importlib.machinery.FileFinder(path, [(lazy_loader, suffixes)])

View File

@ -149,6 +149,10 @@ Improved Modules
subclassing of :class:`~inspect.Signature` easier (contributed
by Yury Selivanov and Eric Snow in :issue:`17373`).
* :class:`importlib.util.LazyLoader` allows for the lazy loading of modules in
applications where startup time is paramount (contributed by Brett Cannon in
:issue:`17621`).
Optimizations
=============

View File

@ -1,5 +1,5 @@
"""Utility code for constructing importers, etc."""
from . import abc
from ._bootstrap import MAGIC_NUMBER
from ._bootstrap import cache_from_source
from ._bootstrap import decode_source
@ -12,6 +12,7 @@ from ._bootstrap import _find_spec
from contextlib import contextmanager
import functools
import sys
import types
import warnings
@ -200,3 +201,94 @@ def module_for_loader(fxn):
return fxn(self, module, *args, **kwargs)
return module_for_loader_wrapper
class _Module(types.ModuleType):
"""A subclass of the module type to allow __class__ manipulation."""
class _LazyModule(types.ModuleType):
"""A subclass of the module type which triggers loading upon attribute access."""
def __getattribute__(self, attr):
"""Trigger the load of the module and return the attribute."""
# All module metadata must be garnered from __spec__ in order to avoid
# using mutated values.
# Stop triggering this method.
self.__class__ = _Module
# Get the original name to make sure no object substitution occurred
# in sys.modules.
original_name = self.__spec__.name
# Figure out exactly what attributes were mutated between the creation
# of the module and now.
attrs_then = self.__spec__.loader_state
attrs_now = self.__dict__
attrs_updated = {}
for key, value in attrs_now.items():
# Code that set the attribute may have kept a reference to the
# assigned object, making identity more important than equality.
if key not in attrs_then:
attrs_updated[key] = value
elif id(attrs_now[key]) != id(attrs_then[key]):
attrs_updated[key] = value
self.__spec__.loader.exec_module(self)
# If exec_module() was used directly there is no guarantee the module
# object was put into sys.modules.
if original_name in sys.modules:
if id(self) != id(sys.modules[original_name]):
msg = ('module object for {!r} substituted in sys.modules '
'during a lazy load')
raise ValueError(msg.format(original_name))
# Update after loading since that's what would happen in an eager
# loading situation.
self.__dict__.update(attrs_updated)
return getattr(self, attr)
def __delattr__(self, attr):
"""Trigger the load and then perform the deletion."""
# To trigger the load and raise an exception if the attribute
# doesn't exist.
self.__getattribute__(attr)
delattr(self, attr)
class LazyLoader(abc.Loader):
"""A loader that creates a module which defers loading until attribute access."""
@staticmethod
def __check_eager_loader(loader):
if not hasattr(loader, 'exec_module'):
raise TypeError('loader must define exec_module()')
elif hasattr(loader.__class__, 'create_module'):
if abc.Loader.create_module != loader.__class__.create_module:
# Only care if create_module() is overridden in a subclass of
# importlib.abc.Loader.
raise TypeError('loader cannot define create_module()')
@classmethod
def factory(cls, loader):
"""Construct a callable which returns the eager loader made lazy."""
cls.__check_eager_loader(loader)
return lambda *args, **kwargs: cls(loader(*args, **kwargs))
def __init__(self, loader):
self.__check_eager_loader(loader)
self.loader = loader
def create_module(self, spec):
"""Create a module which can have its __class__ manipulated."""
return _Module(spec.name)
def exec_module(self, module):
"""Make the module load lazily."""
module.__spec__.loader = self.loader
module.__loader__ = self.loader
# Don't need to worry about deep-copying as trying to set an attribute
# on an object would have triggered the load,
# e.g. ``module.__spec__.loader = None`` would trigger a load from
# trying to access module.__spec__.
module.__spec__.loader_state = module.__dict__.copy()
module.__class__ = _LazyModule

View File

@ -0,0 +1,132 @@
import importlib
from importlib import abc
from importlib import util
import unittest
from . import util as test_util
class CollectInit:
def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
def exec_module(self, module):
return self
class LazyLoaderFactoryTests(unittest.TestCase):
def test_init(self):
factory = util.LazyLoader.factory(CollectInit)
# E.g. what importlib.machinery.FileFinder instantiates loaders with
# plus keyword arguments.
lazy_loader = factory('module name', 'module path', kw='kw')
loader = lazy_loader.loader
self.assertEqual(('module name', 'module path'), loader.args)
self.assertEqual({'kw': 'kw'}, loader.kwargs)
def test_validation(self):
# No exec_module(), no lazy loading.
with self.assertRaises(TypeError):
util.LazyLoader.factory(object)
class TestingImporter(abc.MetaPathFinder, abc.Loader):
module_name = 'lazy_loader_test'
mutated_name = 'changed'
loaded = None
source_code = 'attr = 42; __name__ = {!r}'.format(mutated_name)
def find_spec(self, name, path, target=None):
if name != self.module_name:
return None
return util.spec_from_loader(name, util.LazyLoader(self))
def exec_module(self, module):
exec(self.source_code, module.__dict__)
self.loaded = module
class LazyLoaderTests(unittest.TestCase):
def test_init(self):
with self.assertRaises(TypeError):
util.LazyLoader(object)
def new_module(self, source_code=None):
loader = TestingImporter()
if source_code is not None:
loader.source_code = source_code
spec = util.spec_from_loader(TestingImporter.module_name,
util.LazyLoader(loader))
module = spec.loader.create_module(spec)
module.__spec__ = spec
module.__loader__ = spec.loader
spec.loader.exec_module(module)
# Module is now lazy.
self.assertIsNone(loader.loaded)
return module
def test_e2e(self):
# End-to-end test to verify the load is in fact lazy.
importer = TestingImporter()
assert importer.loaded is None
with test_util.uncache(importer.module_name):
with test_util.import_state(meta_path=[importer]):
module = importlib.import_module(importer.module_name)
self.assertIsNone(importer.loaded)
# Trigger load.
self.assertEqual(module.__loader__, importer)
self.assertIsNotNone(importer.loaded)
self.assertEqual(module, importer.loaded)
def test_attr_unchanged(self):
# An attribute only mutated as a side-effect of import should not be
# changed needlessly.
module = self.new_module()
self.assertEqual(TestingImporter.mutated_name, module.__name__)
def test_new_attr(self):
# A new attribute should persist.
module = self.new_module()
module.new_attr = 42
self.assertEqual(42, module.new_attr)
def test_mutated_preexisting_attr(self):
# Changing an attribute that already existed on the module --
# e.g. __name__ -- should persist.
module = self.new_module()
module.__name__ = 'bogus'
self.assertEqual('bogus', module.__name__)
def test_mutated_attr(self):
# Changing an attribute that comes into existence after an import
# should persist.
module = self.new_module()
module.attr = 6
self.assertEqual(6, module.attr)
def test_delete_eventual_attr(self):
# Deleting an attribute should stay deleted.
module = self.new_module()
del module.attr
self.assertFalse(hasattr(module, 'attr'))
def test_delete_preexisting_attr(self):
module = self.new_module()
del module.__name__
self.assertFalse(hasattr(module, '__name__'))
def test_module_substitution_error(self):
source_code = 'import sys; sys.modules[__name__] = 42'
module = self.new_module(source_code)
with test_util.uncache(TestingImporter.module_name):
with self.assertRaises(ValueError):
module.__name__
if __name__ == '__main__':
unittest.main()

View File

@ -29,6 +29,8 @@ Core and Builtins
Library
-------
- Issue #17621: Introduce importlib.util.LazyLoader.
- Issue #21076: signal module constants were turned into enums.
Patch by Giampaolo Rodola'.