Rewrite the code implementing __import__ for importlib. Now it is much simpler

and relies much more on meta path finders to abstract out various parts of
import.

As part of this the semantics for import_module tightened up and now follow
__import__ much more closely (biggest thing is that the 'package' argument must
now already be imported, else a SystemError is raised).
This commit is contained in:
Brett Cannon 2009-02-07 01:15:27 +00:00
parent 887b3f2625
commit 2c318a1390
9 changed files with 117 additions and 502 deletions

View File

@ -72,11 +72,15 @@ Functions
import in absolute or relative terms
(e.g. either ``pkg.mod`` or ``..mod``). If the name is
specified in relative terms, then the *package* argument must be
specified to the package which is to act as the anchor for resolving the
set to the package which is to act as the anchor for resolving the
package name (e.g. ``import_module('..mod', 'pkg.subpkg')`` will import
``pkg.mod``). The specified module will be inserted into
:data:`sys.modules` and returned.
``pkg.mod``).
The :func:`import_module` function acts as a simplifying wrapper around
:func:`__import__`. This means all semantics of the function are derived
from :func:`__import__`, including requiring the package where an import is
occuring from to already be imported (i.e., *package* must already be
imported).
:mod:`importlib.machinery` -- Importers and path hooks
------------------------------------------------------

View File

@ -1,35 +1,10 @@
to do
/////
* Create sandbox directory for a distutils packaging of what is in Python 2.7.
* Use rpartition for getting the package of a module.
+ Make sure that an empty string is acceptable for __package__.
* Create meta_path importer for sys.path.
+ Document.
* Refactor __import__.
+ Create a greatest common denominator function for __import__/import_module
that takes in an absolute module name and performs the import.
- Needs of __import__
* Figure out caller's package.
* Import module.
* Set __package__.
* Figure out what module to return.
- Needs of import_module
* Resolve name/level.
* Import module.
+ Use GCD import for __import__.
+ Use GCD import for import_module.
+ Make sure there is a test for the empty string as acceptable for
__package__.
* Implement PEP 302 protocol for loaders (should just be a matter of testing).
@ -66,13 +41,11 @@ to do
* source_path
* bytecode_path
* write_bytecode
* write_bytecode (not abstract)
+ util
- get_module decorator (new name)
- check_name decorator (new name)
- resolve_name
- get_module decorator (rename: module_for_loader)
+ machinery
@ -88,6 +61,8 @@ to do
* SourceFinder
* (?) Loader
- PathFinder
* Write benchmark suite.
* OPTIMIZE!

View File

@ -29,7 +29,7 @@ def _set__import__():
"""Set __import__ to an instance of Import."""
global original__import__
original__import__ = __import__
__builtins__['__import__'] = Import()
__builtins__['__import__'] = _bootstrap._import
def _reset__import__():
@ -114,7 +114,7 @@ marshal._r_long = _r_long
# Public API #########################################################
__import__ = _bootstrap.Import().__call__
__import__ = _bootstrap._import
def import_module(name, package=None):
@ -125,17 +125,15 @@ def import_module(name, package=None):
relative import to an absolute import.
"""
level = 0
if name.startswith('.'):
if not package:
raise TypeError("relative imports require the 'package' argument")
level = 0
for character in name:
if character != '.':
break
level += 1
name = Import._resolve_name(name[level:], package, level)
__import__(name)
return sys.modules[name]
return _bootstrap._gcd_import(name[level:], package, level)
# XXX This should go away once the public API is done.

View File

@ -681,22 +681,31 @@ def _gcd_import(name, package=None, level=0):
being made from, and the level adjustment.
This function represents the greatest common denominator of functionality
between import_module and __import__.
between import_module and __import__. This includes settting __package__ if
the loader did not.
"""
if package and package not in sys.modules:
msg = "Parent module {0!r} not loaded, cannot perform relative import"
raise SystemError(msg.format(package))
dot = len(package)
if package:
if not hasattr(package, 'rindex'):
raise ValueError("__package__ not set to a string")
elif package not in sys.modules:
msg = ("Parent module {0!r} not loaded, cannot perform relative "
"import")
raise SystemError(msg.format(package))
if not name and level == 0:
raise ValueError("Empty module name")
if level > 0:
dot = len(package)
for x in range(level, 1, -1):
try:
dot = package.rindex('.', 0, dot)
except AttributeError:
raise ValueError("__package__ not set to a string")
except ValueError:
raise ValueError("attempted relative import beyond top-level "
"package")
name = "{0}.{1}".format(package[:dot], name)
raise ValueError("attempted relative import beyond "
"top-level package")
if name:
name = "{0}.{1}".format(package[:dot], name)
else:
name = package[:dot]
with ImportLockContext():
try:
return sys.modules[name]
@ -706,319 +715,83 @@ def _gcd_import(name, package=None, level=0):
path = None
if parent:
if parent not in sys.modules:
parent_module = _gcd_import(parent)
else:
parent_module = sys.modules[parent]
_gcd_import(parent)
# Backwards-compatibility; be nicer to skip the dict lookup.
parent_module = sys.modules[parent]
path = parent_module.__path__
for finder in sys.meta_path + [PathFinder]:
meta_path = (sys.meta_path +
[BuiltinImporter, FrozenImporter, PathFinder])
for finder in meta_path:
loader = finder.find_module(name, path)
if loader: # XXX Worth checking for None explicitly?
return loader.load_module(name)
if loader is not None:
loader.load_module(name)
break
else:
raise ImportError("No module named {0}".format(name))
class Import(object):
"""Class that implements the __import__ interface.
Backwards compatibility is maintained by extending sys.meta_path
interally (for handling built-in and frozen modules) and providing a
default path hooks entry for extension modules, .py, and .pyc
files. Both are controlled during instance initialization.
"""
def __init__(self, default_path_hook=None,
extended_meta_path=None):
"""Store a default path hook entry and a sequence to internally extend
sys.meta_path by (passing in None uses default importers)."""
if extended_meta_path is None:
self.extended_meta_path = BuiltinImporter, FrozenImporter
else:
self.extended_meta_path = extended_meta_path
self.default_path_hook = default_path_hook
if self.default_path_hook is None:
# Create a handler to deal with extension modules, .py, and .pyc
# files. Built-in and frozen modules are handled by sys.meta_path
# entries.
importers = [ExtensionFileImporter, PyFileImporter]
self.default_path_hook = chaining_fs_path_hook(*importers)
def _search_meta_path(self, name, path=None):
"""Check the importers on sys.meta_path for a loader along with the
extended meta path sequence stored within this instance.
The extended sys.meta_path entries are searched after the entries on
sys.meta_path.
"""
for entry in (tuple(sys.meta_path) + self.extended_meta_path):
loader = entry.find_module(name, path)
if loader:
return loader
else:
raise ImportError("No module named %s" % name)
def _sys_path_importer(self, path_entry):
"""Return the importer for the specified path, from
sys.path_importer_cache if possible.
If None is stored in sys.path_importer_cache then use the default path
hook.
"""
try:
# See if an importer is cached.
importer = sys.path_importer_cache[path_entry]
# If None was returned, use default importer factory.
if importer is None:
return self.default_path_hook(path_entry)
else:
return importer
except KeyError:
# No cached importer found; try to get a new one from
# sys.path_hooks or imp.NullImporter.
for importer_factory in (sys.path_hooks + [imp.NullImporter]):
try:
importer = importer_factory(path_entry)
sys.path_importer_cache[path_entry] = importer
return importer
except ImportError:
continue
else:
# No importer factory on sys.path_hooks works; use the default
# importer factory and store None in sys.path_importer_cache.
try:
importer = self.default_path_hook(path_entry)
sys.path_importer_cache[path_entry] = None
return importer
except ImportError:
raise ImportError("no importer found for %s" % path_entry)
def _search_std_path(self, name, path=None):
"""Check sys.path or 'path' (depending if 'path' is set) for the
named module and return its loader."""
if path:
search_paths = path
else:
search_paths = sys.path
for entry in search_paths:
try:
importer = self._sys_path_importer(entry)
except ImportError:
continue
loader = importer.find_module(name)
if loader:
return loader
else:
raise ImportError("No module named %s" % name)
def module_from_cache(self, name):
"""Try to return the named module from sys.modules.
Return False if the module is not in the cache.
"""
if name in sys.modules:
return sys.modules[name]
else:
return False
def post_import(self, module):
"""Perform any desired post-import processing on the module."""
return module
def _import_module(self, name, path=None):
"""Import the specified module with no handling of parent modules.
If None is set for a value in sys.modules (to signify that a relative
import was attempted and failed) then ImportError is raised.
"""
cached_module = self.module_from_cache(name)
if cached_module is not False:
if cached_module is None:
raise ImportError("relative import redirect")
else:
return cached_module
try:
# Attempt to find a loader on sys.meta_path.
loader = self._search_meta_path(name, path)
except ImportError:
# sys.meta_path search failed. Attempt to find a loader on
# sys.path. If this fails then module cannot be found.
loader = self._search_std_path(name, path)
# A loader was found. It is the loader's responsibility to have put an
# entry in sys.modules.
module = self.post_import(loader.load_module(name))
# 'module' could be something like None.
if not hasattr(module, '__name__'):
return module
# Set __package__.
if not hasattr(module, '__package__') or module.__package__ is None:
if hasattr(module, '__path__'):
module.__package__ = module.__name__
elif '.' in module.__name__:
pkg_name = module.__name__.rsplit('.', 1)[0]
module.__package__ = pkg_name
else:
module.__package__ = None
return module
def _import_full_module(self, name):
"""Import a module and set it on its parent if needed."""
path_list = None
parent_name = name.rsplit('.', 1)[0]
parent = None
if parent_name != name:
parent = sys.modules[parent_name]
try:
path_list = parent.__path__
except AttributeError:
pass
self._import_module(name, path_list)
# Backwards-compatibility; be nicer to skip the dict lookup.
module = sys.modules[name]
if parent:
tail = name.rsplit('.', 1)[-1]
setattr(parent, tail, module)
# Set the module as an attribute on its parent.
setattr(parent_module, name.rpartition('.')[2], module)
# Set __package__ if the loader did not.
if not hasattr(module, '__package__') or module.__package__ is None:
# Watch out for what comes out of sys.modules to not be a module,
# e.g. an int.
try:
module.__package__ = module.__name__
if not hasattr(module, '__path__'):
module.__package__ = module.__package__.rpartition('.')[0]
except AttributeError:
pass
return module
def _find_package(self, name, has_path):
"""Return the package that the caller is in or None."""
if has_path:
return name
elif '.' in name:
return name.rsplit('.', 1)[0]
else:
return None
@staticmethod
def _resolve_name(name, package, level):
"""Return the absolute name of the module to be imported."""
level -= 1
def _import(name, globals={}, locals={}, fromlist=[], level=0):
"""Import a module.
The 'globals' argument is used to infer where the import is occuring from
to handle relative imports. The 'locals' argument is ignored. The
'fromlist' argument specifies what should exist as attributes on the module
being imported (e.g. ``from module import <fromlist>``). The 'level'
argument represents the package location to import from in a relative
import (e.g. ``from ..pkg import mod`` would have a 'level' of 2).
"""
if level == 0:
module = _gcd_import(name)
else:
# __package__ is not guaranteed to be defined.
try:
if package.count('.') < level:
raise ValueError("attempted relative import beyond top-level "
"package")
except AttributeError:
raise ValueError("__package__ not set to a string")
base = package.rsplit('.', level)[0]
if name:
return "{0}.{1}".format(base, name)
else:
return base
def _return_module(self, absolute_name, relative_name, fromlist):
"""Return the proper module based on what module was requested (and its
absolute module name), who is requesting it, and whether any specific
attributes were specified.
The semantics of this method revolve around 'fromlist'. When it is
empty, the module up to the first dot is to be returned. When the
module being requested is an absolute name this is simple (and
relative_name is an empty string). But if the requested module was
a relative import (as signaled by relative_name having a non-false
value), then the name up to the first dot in the relative name resolved
to an absolute name is to be returned.
When fromlist is not empty and the module being imported is a package,
then the values
in fromlist need to be checked for. If a value is not a pre-existing
attribute a relative import is attempted. If it fails then suppressed
the failure silently.
"""
if not fromlist:
if relative_name:
absolute_base = absolute_name.rpartition(relative_name)[0]
relative_head = relative_name.split('.', 1)[0]
to_return = absolute_base + relative_head
else:
to_return = absolute_name.split('.', 1)[0]
return sys.modules[to_return]
# When fromlist is not empty, return the actual module specified in
# the import.
else:
module = sys.modules[absolute_name]
if hasattr(module, '__path__') and hasattr(module, '__name__'):
# When fromlist has a value and the imported module is a
# package, then if a name in fromlist is not found as an
# attribute on module, try a relative import to find it.
# Failure is fine and the exception is suppressed.
check_for = list(fromlist)
if '*' in check_for and hasattr(module, '__all__'):
check_for.extend(module.__all__)
for item in check_for:
if item == '*':
continue
if not hasattr(module, item):
resolved_name = self._resolve_name(item,
module.__name__, 1)
try:
self._import_full_module(resolved_name)
except ImportError:
pass
package = globals['__package__']
except KeyError:
package = globals['__name__']
if '__path__' not in globals:
package = package.rpartition('.')[0]
module = _gcd_import(name, package, level)
# The hell that is fromlist ...
if not fromlist:
# Return up to the first dot in 'name'. This is complicated by the fact
# that 'name' may be relative.
if level == 0:
return sys.modules[name.partition('.')[0]]
elif not name:
return module
else:
cut_off = len(name) - len(name.partition('.')[0])
return sys.modules[module.__name__[:-cut_off]]
else:
# If a package was imported, try to import stuff from fromlist.
if hasattr(module, '__path__'):
if '*' in fromlist and hasattr(module, '__all__'):
fromlist.remove('*')
fromlist.extend(module.__all__)
for x in (y for y in fromlist if not hasattr(module,y)):
try:
_gcd_import('{0}.{1}'.format(module.__name__, x))
except ImportError:
pass
return module
def __call__(self, name, globals={}, locals={}, fromlist=[], level=0):
"""Import a module.
The 'name' argument is the name of the module to be imported (e.g.,
'foo' in ``import foo`` or ``from foo import ...``).
'globals' and 'locals' are the global and local namespace dictionaries
of the module where the import statement appears. 'globals' is used to
introspect the __path__ and __name__ attributes of the module making
the call. 'local's is ignored.
'fromlist' lists any specific objects that are to eventually be put
into the namespace (e.g., ``from for.bar import baz`` would have 'baz'
in the fromlist, and this includes '*'). An entry of '*' will lead to
a check for __all__ being defined on the module. If it is defined then
the values in __all__ will be checked to make sure that all values are
attributes on the module, attempting a module import relative to 'name'
to set that attribute.
When 'name' is a dotted name, there are two different situations to
consider for the return value. One is when the fromlist is empty.
In this situation the import statement imports and returns the name up
to the first dot. All subsequent names are imported but set as
attributes as needed on parent modules. When fromlist is not empty
then the module represented by the full dotted name is returned.
'level' represents possible relative imports.
A value of 0 is for absolute module names. Any positive value
represents the number of dots listed in the relative import statement
(e.g. has a value of 2 for ``from .. import foo``).
"""
# TODO(brett.cannon) outdated check; just care that level >= 0
if not name and level < 1:
raise ValueError("Empty module name")
is_pkg = True if '__path__' in globals else False
caller_name = globals.get('__name__')
package = globals.get('__package__')
if caller_name and not package:
package = self._find_package(caller_name, '__path__' in globals)
if package and package not in sys.modules:
if not hasattr(package, 'rsplit'):
raise ValueError("__package__ not set to a string")
msg = ("Parent module {0!r} not loaded, "
"cannot perform relative import")
raise SystemError(msg.format(package))
with ImportLockContext():
if level:
imported_name = self._resolve_name(name, package, level)
else:
imported_name = name
parent_name = imported_name.rsplit('.', 1)[0]
if parent_name != imported_name and parent_name not in sys.modules:
self.__call__(parent_name, level=0)
# This call will also handle setting the attribute on the
# package.
self._import_full_module(imported_name)
relative_name = '' if imported_name == name else name
return self._return_module(imported_name, relative_name, fromlist)
# XXX Eventually replace with a proper __all__ value (i.e., don't expose os
# replacements but do expose _ExtensionFileLoader, etc. for testing).

View File

@ -38,8 +38,9 @@ class Using__package__(unittest.TestCase):
with util.mock_modules('pkg.__init__', 'pkg.fake') as importer:
with util.import_state(meta_path=[importer]):
import_util.import_('pkg.fake')
module = import_util.import_('', globals={'__package__': 'pkg.fake'},
fromlist=['attr'], level=2)
module = import_util.import_('',
globals={'__package__': 'pkg.fake'},
fromlist=['attr'], level=2)
self.assertEquals(module.__name__, 'pkg')
def test_using___name__(self):
@ -82,7 +83,7 @@ class Setting__package__(unittest.TestCase):
with util.import_state(meta_path=[mock]):
del mock['top_level'].__package__
module = import_util.import_('top_level')
self.assert_(module.__package__ is None)
self.assertEqual(module.__package__, '')
# [package]
def test_package(self):

View File

@ -64,7 +64,8 @@ class UseCache(unittest.TestCase):
with util.import_state(meta_path=[importer]):
module = import_util.import_('pkg', fromlist=['module'])
self.assert_(hasattr(module, 'module'))
self.assertEquals(id(module.module), id(sys.modules['pkg.module']))
self.assertEquals(id(module.module),
id(sys.modules['pkg.module']))
def test_main():

View File

@ -10,148 +10,6 @@ from types import MethodType
import unittest
class BaseTests(unittest.TestCase):
"""When sys.meta_path cannot find the desired module, sys.path is
consulted. For each entry on the sequence [order], sys.path_importer_cache
is checked to see if it contains a key for the entry [cache check]. If an
importer is found then it is consulted before trying the next entry in
sys.path [cache use]. The 'path' argument to find_module() is never used
when trying to find a module [path not used].
If an entry from sys.path is not in sys.path_importer_cache, sys.path_hooks
is called in turn [hooks order]. If a path hook cannot handle an entry,
ImportError is raised [hook failure]. Otherwise the resulting object is
cached in sys.path_importer_cache and then consulted [hook success]. If no
hook is found, None is set in sys.path_importer_cache and the default
importer is tried [no hook].
For use of __path__ in a package, the above is all true, just substitute
"sys.path" for "__path__".
"""
def order_test(self, to_import, entry, search_path, path=[]):
# [order]
log = []
class LogFindModule(util.mock_modules):
def find_module(self, fullname):
log.append(self)
return super().find_module(fullname)
assert len(search_path) == 2
misser = LogFindModule(search_path[0])
hitter = LogFindModule(to_import)
with nested(misser, hitter):
cache = dict(zip(search_path, (misser, hitter)))
with util.import_state(path=path, path_importer_cache=cache):
import_util.import_(to_import)
self.assertEquals(log[0], misser)
self.assertEquals(log[1], hitter)
@import_util.importlib_only # __import__ uses PyDict_GetItem(), bypassing log.
def cache_use_test(self, to_import, entry, path=[]):
# [cache check], [cache use]
log = []
class LoggingDict(dict):
def __getitem__(self, item):
log.append(item)
return super(LoggingDict, self).__getitem__(item)
with util.mock_modules(to_import) as importer:
cache = LoggingDict()
cache[entry] = importer
with util.import_state(path=[entry], path_importer_cache=cache):
module = import_util.import_(to_import, fromlist=['a'])
self.assert_(module is importer[to_import])
self.assertEquals(len(cache), 1)
self.assertEquals([entry], log)
def hooks_order_test(self, to_import, entry, path=[]):
# [hooks order], [hooks failure], [hook success]
log = []
def logging_hook(entry):
log.append(entry)
raise ImportError
with util.mock_modules(to_import) as importer:
hitter = import_util.mock_path_hook(entry, importer=importer)
path_hooks = [logging_hook, logging_hook, hitter]
with util.import_state(path_hooks=path_hooks, path=path):
import_util.import_(to_import)
self.assertEquals(sys.path_importer_cache[entry], importer)
self.assertEquals(len(log), 2)
# [no hook] XXX Worry about after deciding how to handle the default hook.
def path_argument_test(self, to_import):
# [path not used]
class BadImporter:
"""Class to help detect TypeError from calling find_module() with
an improper number of arguments."""
def find_module(name):
raise ImportError
try:
import_util.import_(to_import)
except ImportError:
pass
class PathTests(BaseTests):
"""Tests for sys.path."""
def test_order(self):
self.order_test('hit', 'second', ['first', 'second'],
['first', 'second'])
def test_cache_use(self):
entry = "found!"
self.cache_use_test('hit', entry, [entry])
def test_hooks_order(self):
entry = "found!"
self.hooks_order_test('hit', entry, [entry])
def test_path_argument(self):
name = 'total junk'
with util.uncache(name):
self.path_argument_test(name)
class __path__Tests(BaseTests):
"""Tests for __path__."""
def run_test(self, test, entry, path, *args):
with util.mock_modules('pkg.__init__') as importer:
importer['pkg'].__path__ = path
importer.load_module('pkg')
test('pkg.hit', entry, *args)
@import_util.importlib_only # XXX Unknown reason why this fails.
def test_order(self):
self.run_test(self.order_test, 'second', ('first', 'second'), ['first',
'second'])
def test_cache_use(self):
location = "I'm here!"
self.run_test(self.cache_use_test, location, [location])
def test_hooks_order(self):
location = "I'm here!"
self.run_test(self.hooks_order_test, location, [location])
def test_path_argument(self):
module = imp.new_module('pkg')
module.__path__ = ['random __path__']
name = 'pkg.whatever'
sys.modules['pkg'] = module
with util.uncache('pkg', name):
self.path_argument_test(name)
class FinderTests(unittest.TestCase):
"""Tests for SysPathImporter."""

View File

@ -1,5 +1,5 @@
import functools
import importlib
import importlib._bootstrap
using___import__ = False
@ -9,7 +9,8 @@ def import_(*args, **kwargs):
"""Delegate to allow for injecting different implementations of import."""
if using___import__:
return __import__(*args, **kwargs)
return importlib.Import()(*args, **kwargs)
#return importlib.Import()(*args, **kwargs)
return importlib._bootstrap._import(*args, **kwargs)
def importlib_only(fxn):

View File

@ -1,6 +1,8 @@
import unittest
import importlib
from . import util
import imp
import importlib
import sys
import unittest
class ImportModuleTests(unittest.TestCase):
@ -33,6 +35,7 @@ class ImportModuleTests(unittest.TestCase):
relative_name = '.{0}'.format(module_name)
with util.mock_modules(pkg_long_name, absolute_name) as mock:
with util.import_state(meta_path=[mock]):
importlib.import_module(pkg_name)
module = importlib.import_module(relative_name, pkg_name)
self.assertEqual(module.__name__, absolute_name)
@ -44,6 +47,7 @@ class ImportModuleTests(unittest.TestCase):
name = '{0}.mod'.format(pkg_name)
with util.mock_modules(pkg_long_name, name) as mock:
with util.import_state(meta_path=[mock]):
importlib.import_module(pkg_name)
module = importlib.import_module(name, pkg_name)
self.assertEqual(module.__name__, name)