Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a new importlib.invalidate_caches() function.

importlib is now often faster than imp.find_module() at finding modules.
This commit is contained in:
Antoine Pitrou 2012-02-20 01:48:16 +01:00
parent 336b2f45e5
commit c541f8ef40
7 changed files with 90 additions and 50 deletions

View File

@ -86,6 +86,14 @@ Functions
that was imported (e.g. ``pkg.mod``), while :func:`__import__` returns the
top-level package or module (e.g. ``pkg``).
.. function:: invalidate_caches()
Invalidate importlib's internal caches. Calling this function may be
needed if some modules are installed while your program is running and
you expect the program to notice the changes.
.. versionadded:: 3.3
:mod:`importlib.abc` -- Abstract base classes related to import
---------------------------------------------------------------

View File

@ -18,7 +18,7 @@ References on import:
http://www.python.org/dev/peps/pep-0328
"""
__all__ = ['__import__', 'import_module']
__all__ = ['__import__', 'import_module', 'invalidate_caches']
from . import _bootstrap
@ -37,7 +37,7 @@ _bootstrap._setup(sys, imp)
# Public API #########################################################
from ._bootstrap import __import__
from ._bootstrap import __import__, invalidate_caches
def import_module(name, package=None):

View File

@ -21,31 +21,16 @@ work. One should use importlib as the public-facing version of this module.
CASE_INSENSITIVE_PLATFORMS = 'win', 'cygwin', 'darwin'
def _case_insensitive_ok(directory, check):
"""Check if the directory contains something matching 'check' exists in the
directory.
If PYTHONCASEOK is a defined environment variable then skip the
case-sensitivity check.
"""
if b'PYTHONCASEOK' not in _os.environ:
if not directory:
directory = '.'
return check in _os.listdir(directory)
def _relax_case():
"""True if filenames must be checked case-insensitively."""
if any(map(sys.platform.startswith, CASE_INSENSITIVE_PLATFORMS)):
def _relax_case():
return b'PYTHONCASEOK' in _os.environ
else:
return True
def _case_sensitive_ok(directory, check):
"""Under case-sensitive filesystems always assume the case matches.
Since other code does the file existence check, that subsumes a
case-sensitivity check.
"""
return True
_case_ok = None
def _relax_case():
return False
return _relax_case
# TODO: Expose from marshal
@ -172,6 +157,18 @@ code_type = type(_wrap.__code__)
# Finder/loader utility code ##################################################
_cache_refresh = 0
def invalidate_caches():
"""Invalidate importlib's internal caches.
Calling this function may be needed if some modules are installed while
your program is running and you expect the program to notice the changes.
"""
global _cache_refresh
_cache_refresh += 1
def set_package(fxn):
"""Set __package__ on the returned module."""
def set_package_wrapper(*args, **kwargs):
@ -708,7 +705,7 @@ class PathFinder:
"""
if path == '':
path = _os.getcwd()
path = '.'
try:
finder = sys.path_importer_cache[path]
except KeyError:
@ -760,29 +757,55 @@ class _FileFinder:
for suffix in detail.suffixes)
self.packages = packages
self.modules = modules
self.path = path
# Base (directory) path
self.path = path or '.'
self._path_mtime = -1
self._path_cache = set()
self._cache_refresh = 0
def find_module(self, fullname):
"""Try to find a loader for the specified module."""
tail_module = fullname.rpartition('.')[2]
base_path = _path_join(self.path, tail_module)
if _path_isdir(base_path) and _case_ok(self.path, tail_module):
for suffix, loader in self.packages:
init_filename = '__init__' + suffix
full_path = _path_join(base_path, init_filename)
if (_path_isfile(full_path) and
_case_ok(base_path, init_filename)):
return loader(fullname, full_path)
else:
msg = "Not importing directory {}: missing __init__"
_warnings.warn(msg.format(base_path), ImportWarning)
if _relax_case():
tail_module = tail_module.lower()
try:
mtime = _os.stat(self.path).st_mtime
except OSError:
mtime = -1
if mtime != self._path_mtime or _cache_refresh != self._cache_refresh:
self._fill_cache()
self._path_mtime = mtime
self._cache_refresh = _cache_refresh
cache = self._path_cache
if tail_module in cache:
base_path = _path_join(self.path, tail_module)
if _path_isdir(base_path):
for suffix, loader in self.packages:
init_filename = '__init__' + suffix
full_path = _path_join(base_path, init_filename)
if _path_isfile(full_path):
return loader(fullname, full_path)
else:
msg = "Not importing directory {}: missing __init__"
_warnings.warn(msg.format(base_path), ImportWarning)
for suffix, loader in self.modules:
mod_filename = tail_module + suffix
full_path = _path_join(self.path, mod_filename)
if _path_isfile(full_path) and _case_ok(self.path, mod_filename):
return loader(fullname, full_path)
if mod_filename in cache:
full_path = _path_join(self.path, mod_filename)
if _path_isfile(full_path):
return loader(fullname, full_path)
return None
def _fill_cache(self):
"""Fill the cache of potential modules and packages for this directory."""
path = self.path
contents = _os.listdir(path)
if _relax_case():
self._path_cache = set(fn.lower() for fn in contents)
else:
self._path_cache = set(contents)
class _SourceFinderDetails:
loader = _SourceFileLoader
@ -1060,7 +1083,7 @@ def _setup(sys_module, imp_module):
modules, those two modules must be explicitly passed in.
"""
global _case_ok, imp, sys
global imp, sys
imp = imp_module
sys = sys_module
@ -1093,12 +1116,8 @@ def _setup(sys_module, imp_module):
raise ImportError('importlib requires posix or nt')
setattr(self_module, '_os', os_module)
setattr(self_module, 'path_sep', path_sep)
if any(sys_module.platform.startswith(x)
for x in CASE_INSENSITIVE_PLATFORMS):
_case_ok = _case_insensitive_ok
else:
_case_ok = _case_sensitive_ok
# Constants
setattr(self_module, '_relax_case', _relax_case())
def _install(sys_module, imp_module):

View File

@ -78,11 +78,11 @@ class FinderTests(unittest.TestCase):
path = ''
module = '<test module>'
importer = util.mock_modules(module)
hook = import_util.mock_path_hook(os.getcwd(), importer=importer)
hook = import_util.mock_path_hook(os.curdir, importer=importer)
with util.import_state(path=[path], path_hooks=[hook]):
loader = machinery.PathFinder.find_module(module)
self.assertIs(loader, importer)
self.assertIn(os.getcwd(), sys.path_importer_cache)
self.assertIn(os.curdir, sys.path_importer_cache)
class DefaultPathFinderTests(unittest.TestCase):

View File

@ -2,6 +2,7 @@ import builtins
import imp
from importlib.test.import_ import test_relative_imports
from importlib.test.import_ import util as importlib_util
import importlib
import marshal
import os
import platform
@ -34,6 +35,7 @@ class ImportTests(unittest.TestCase):
def setUp(self):
remove_files(TESTFN)
importlib.invalidate_caches()
def tearDown(self):
unload(TESTFN)
@ -107,6 +109,7 @@ class ImportTests(unittest.TestCase):
create_empty_file(fname)
fn = imp.cache_from_source(fname)
unlink(fn)
importlib.invalidate_caches()
__import__(TESTFN)
if not os.path.exists(fn):
self.fail("__import__ did not result in creation of "
@ -260,6 +263,7 @@ class ImportTests(unittest.TestCase):
os.remove(source)
del sys.modules[TESTFN]
make_legacy_pyc(source)
importlib.invalidate_caches()
mod = __import__(TESTFN)
base, ext = os.path.splitext(mod.__file__)
self.assertIn(ext, ('.pyc', '.pyo'))
@ -358,6 +362,7 @@ func_filename = func.__code__.co_filename
with open(self.file_name, "w") as f:
f.write(self.module_source)
sys.path.insert(0, self.dir_name)
importlib.invalidate_caches()
def tearDown(self):
sys.path[:] = self.sys_path
@ -552,6 +557,7 @@ class PycacheTests(unittest.TestCase):
with open(self.source, 'w') as fp:
print('# This is a test file written by test_import.py', file=fp)
sys.path.insert(0, os.curdir)
importlib.invalidate_caches()
def tearDown(self):
assert sys.path[0] == os.curdir, 'Unexpected sys.path[0]'
@ -599,6 +605,7 @@ class PycacheTests(unittest.TestCase):
pyc_file = make_legacy_pyc(self.source)
os.remove(self.source)
unload(TESTFN)
importlib.invalidate_caches()
m = __import__(TESTFN)
self.assertEqual(m.__file__,
os.path.join(os.curdir, os.path.relpath(pyc_file)))
@ -619,6 +626,7 @@ class PycacheTests(unittest.TestCase):
pyc_file = make_legacy_pyc(self.source)
os.remove(self.source)
unload(TESTFN)
importlib.invalidate_caches()
m = __import__(TESTFN)
self.assertEqual(m.__cached__,
os.path.join(os.curdir, os.path.relpath(pyc_file)))

View File

@ -6,6 +6,7 @@
import sys
import os
import shutil
import importlib
import unittest
from test.support import run_unittest, create_empty_file
@ -212,6 +213,7 @@ class LongReprTest(unittest.TestCase):
# Remember where we are
self.here = os.getcwd()
sys.path.insert(0, self.here)
importlib.invalidate_caches()
def tearDown(self):
actions = []

View File

@ -469,6 +469,9 @@ Core and Builtins
Library
-------
- Issue #14043: Speed up importlib's _FileFinder by at least 8x, and add a
new importlib.invalidate_caches() function.
- Issue #14001: CVE-2012-0845: xmlrpc: Fix an endless loop in
SimpleXMLRPCServer upon malformed POST request.