Refactor source and bytecode file loaders in importlib so that there

are source-only and source/bytecode loaders.
This commit is contained in:
Brett Cannon 2009-02-21 05:41:15 +00:00
parent 0515619dbc
commit 91cf882b36
4 changed files with 163 additions and 149 deletions

View File

@ -1,31 +1,6 @@
to do
/////
* Refactor source/bytecode finder/loader code such that bytecode support is a
subclass of source support (makes it nicer for VMs that don't use CPython
bytecode).
+ PyLoader (for ABC)
- load_module for source only
- get_code for source only
+ PyFileLoader(PyLoader)
- get_data
- source_mtime
- source_path
+PyPycLoader (PyLoader, for ABC)
- load_module for source and bytecode
- get_code for source and bytecode
+ PyPycFileLoader(PyPycLoader, PyFileLoader)
- bytecode_path
- write_bytecode
* Implement PEP 302 protocol for loaders (should just be a matter of testing).
+ Source/bytecode.
@ -42,7 +17,6 @@ to do
* load_module
- (?) Importer(Finder, Loader)
- ResourceLoader(Loader)
* get_data
@ -89,6 +63,8 @@ to do
* Add leading underscores to all objects in importlib._bootstrap that are not
publicly exposed.
* Reorder importlib/_bootstrap.py so definitions are not in inverted order.
* Make sure that there is documentation *somewhere* fully explaining the
semantics of import that can be referenced from the package's documentation
(even if it is in the package documentation itself, although it might be best

View File

@ -315,17 +315,124 @@ def module_for_loader(fxn):
return decorated
class _PyFileLoader:
# XXX Still smart to have this as a separate class? Or would it work
# better to integrate with PyFileFinder? Could cache _is_pkg info.
# FileFinder can be changed to return self instead of a specific loader
# call. Otherwise _base_path can be calculated on the fly without issue if
# it is known whether a module should be treated as a path or package to
# minimize stat calls. Could even go as far as to stat the directory the
# importer is in to detect changes and then cache all the info about what
# files were found (if stating directories is platform-dependent).
class PyLoader:
"""Load a Python source or bytecode file."""
"""Loader base class for Python source.
Requires implementing the optional PEP 302 protocols as well as
source_mtime and source_path.
"""
@module_for_loader
def load_module(self, module):
"""Load a source module."""
return _load_module(module)
def _load_module(self, module):
"""Initialize a module from source."""
name = module.__name__
source_path = self.source_path(name)
code_object = self.get_code(module.__name__)
if not hasattr(module, '__file__'):
module.__file__ = source_path
if self.is_package(name):
module.__path__ = [module.__file__.rsplit(path_sep, 1)[0]]
module.__package__ = module.__name__
if not hasattr(module, '__path__'):
module.__package__ = module.__package__.rpartition('.')[0]
exec(code_object, module.__dict__)
return module
def get_code(self, fullname):
"""Get a code object from source."""
source_path = self.source_path(fullname)
source = self.get_data(source_path)
# Convert to universal newlines.
line_endings = b'\n'
for index, c in enumerate(source):
if c == ord(b'\n'):
break
elif c == ord(b'\r'):
line_endings = b'\r'
try:
if source[index+1] == ord(b'\n'):
line_endings += b'\n'
except IndexError:
pass
break
if line_endings != b'\n':
source = source.replace(line_endings, b'\n')
return compile(source, source_path, 'exec', dont_inherit=True)
class PyPycLoader(PyLoader):
"""Loader base class for Python source and bytecode.
Requires implementing the methods needed for PyLoader as well as
bytecode_path and write_bytecode.
"""
@module_for_loader
def load_module(self, module):
"""Load a module from source or bytecode."""
name = module.__name__
source_path = self.source_path(name)
bytecode_path = self.bytecode_path(name)
module.__file__ = source_path if source_path else bytecode_path
return self._load_module(module)
def get_code(self, fullname):
"""Get a code object from source or bytecode."""
# XXX Care enough to make sure this call does not happen if the magic
# number is bad?
source_timestamp = self.source_mtime(fullname)
# Try to use bytecode if it is available.
bytecode_path = self.bytecode_path(fullname)
if bytecode_path:
data = self.get_data(bytecode_path)
magic = data[:4]
pyc_timestamp = marshal._r_long(data[4:8])
bytecode = data[8:]
try:
# Verify that the magic number is valid.
if imp.get_magic() != magic:
raise ImportError("bad magic number")
# Verify that the bytecode is not stale (only matters when
# there is source to fall back on.
if source_timestamp:
if pyc_timestamp < source_timestamp:
raise ImportError("bytecode is stale")
except ImportError:
# If source is available give it a shot.
if source_timestamp is not None:
pass
else:
raise
else:
# Bytecode seems fine, so try to use it.
# XXX If the bytecode is ill-formed, would it be beneficial to
# try for using source if available and issue a warning?
return marshal.loads(bytecode)
elif source_timestamp is None:
raise ImportError("no source or bytecode available to create code "
"object for {0!r}".format(fullname))
# Use the source.
code_object = super().get_code(fullname)
# Generate bytecode and write it out.
if not sys.dont_write_bytecode:
data = bytearray(imp.get_magic())
data.extend(marshal._w_long(source_timestamp))
data.extend(marshal.dumps(code_object))
self.write_bytecode(fullname, data)
return code_object
class PyFileLoader(PyLoader):
"""Load a Python source file."""
def __init__(self, name, path, is_pkg):
self._name = name
@ -354,29 +461,6 @@ class _PyFileLoader:
# Not a property so that it is easy to override.
return self._find_path(imp.PY_SOURCE)
@check_name
def bytecode_path(self, fullname):
"""Return the path to a bytecode file, or None if one does not
exist."""
# Not a property for easy overriding.
return self._find_path(imp.PY_COMPILED)
@module_for_loader
def load_module(self, module):
"""Load a Python source or bytecode module."""
name = module.__name__
source_path = self.source_path(name)
bytecode_path = self.bytecode_path(name)
code_object = self.get_code(module.__name__)
module.__file__ = source_path if source_path else bytecode_path
module.__loader__ = self
if self.is_package(name):
module.__path__ = [module.__file__.rsplit(path_sep, 1)[0]]
module.__package__ = module.__name__
if not hasattr(module, '__path__'):
module.__package__ = module.__package__.rpartition('.')[0]
exec(code_object, module.__dict__)
return module
@check_name
def source_mtime(self, name):
@ -405,6 +489,34 @@ class _PyFileLoader:
# anything other than UTF-8.
return open(source_path, encoding=encoding).read()
def get_data(self, path):
"""Return the data from path as raw bytes."""
return _fileio._FileIO(path, 'r').read()
@check_name
def is_package(self, fullname):
"""Return a boolean based on whether the module is a package.
Raises ImportError (like get_source) if the loader cannot handle the
package.
"""
return self._is_pkg
# XXX Rename _PyFileLoader throughout
class PyPycFileLoader(PyPycLoader, PyFileLoader):
"""Load a module from a source or bytecode file."""
@check_name
def bytecode_path(self, fullname):
"""Return the path to a bytecode file, or None if one does not
exist."""
# Not a property for easy overriding.
return self._find_path(imp.PY_COMPILED)
@check_name
def write_bytecode(self, name, data):
"""Write out 'data' for the specified module, returning a boolean
@ -428,82 +540,6 @@ class _PyFileLoader:
else:
raise
def get_code(self, name):
"""Return the code object for the module."""
# XXX Care enough to make sure this call does not happen if the magic
# number is bad?
source_timestamp = self.source_mtime(name)
# Try to use bytecode if it is available.
bytecode_path = self.bytecode_path(name)
if bytecode_path:
data = self.get_data(bytecode_path)
magic = data[:4]
pyc_timestamp = marshal._r_long(data[4:8])
bytecode = data[8:]
try:
# Verify that the magic number is valid.
if imp.get_magic() != magic:
raise ImportError("bad magic number")
# Verify that the bytecode is not stale (only matters when
# there is source to fall back on.
if source_timestamp:
if pyc_timestamp < source_timestamp:
raise ImportError("bytcode is stale")
except ImportError:
# If source is available give it a shot.
if source_timestamp is not None:
pass
else:
raise
else:
# Bytecode seems fine, so try to use it.
# XXX If the bytecode is ill-formed, would it be beneficial to
# try for using source if available and issue a warning?
return marshal.loads(bytecode)
elif source_timestamp is None:
raise ImportError("no source or bytecode available to create code "
"object for {0!r}".format(name))
# Use the source.
source_path = self.source_path(name)
source = self.get_data(source_path)
# Convert to universal newlines.
line_endings = b'\n'
for index, c in enumerate(source):
if c == ord(b'\n'):
break
elif c == ord(b'\r'):
line_endings = b'\r'
try:
if source[index+1] == ord(b'\n'):
line_endings += b'\n'
except IndexError:
pass
break
if line_endings != b'\n':
source = source.replace(line_endings, b'\n')
code_object = compile(source, source_path, 'exec', dont_inherit=True)
# Generate bytecode and write it out.
if not sys.dont_write_bytecode:
data = bytearray(imp.get_magic())
data.extend(marshal._w_long(source_timestamp))
data.extend(marshal.dumps(code_object))
self.write_bytecode(name, data)
return code_object
def get_data(self, path):
"""Return the data from path as raw bytes."""
return _fileio._FileIO(path, 'r').read()
@check_name
def is_package(self, fullname):
"""Return a boolean based on whether the module is a package.
Raises ImportError (like get_source) if the loader cannot handle the
package.
"""
return self._is_pkg
class FileFinder:
@ -583,7 +619,7 @@ class PyFileFinder(FileFinder):
"""Importer for source/bytecode files."""
_possible_package = True
_loader = _PyFileLoader
_loader = PyFileLoader
def __init__(self, path_entry):
# Lack of imp during class creation means _suffixes is set here.
@ -597,6 +633,8 @@ class PyPycFileFinder(PyFileFinder):
"""Finder for source and bytecode files."""
_loader = PyPycFileLoader
def __init__(self, path_entry):
super().__init__(path_entry)
self._suffixes += suffix_list(imp.PY_COMPILED)

View File

@ -19,7 +19,7 @@ class SimpleTest(unittest.TestCase):
# [basic]
def test_module(self):
with source_util.create_modules('_temp') as mapping:
loader = importlib._PyFileLoader('_temp', mapping['_temp'], False)
loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False)
module = loader.load_module('_temp')
self.assert_('_temp' in sys.modules)
check = {'__name__': '_temp', '__file__': mapping['_temp'],
@ -29,7 +29,7 @@ class SimpleTest(unittest.TestCase):
def test_package(self):
with source_util.create_modules('_pkg.__init__') as mapping:
loader = importlib._PyFileLoader('_pkg', mapping['_pkg.__init__'],
loader = importlib.PyPycFileLoader('_pkg', mapping['_pkg.__init__'],
True)
module = loader.load_module('_pkg')
self.assert_('_pkg' in sys.modules)
@ -42,7 +42,7 @@ class SimpleTest(unittest.TestCase):
def test_lacking_parent(self):
with source_util.create_modules('_pkg.__init__', '_pkg.mod')as mapping:
loader = importlib._PyFileLoader('_pkg.mod', mapping['_pkg.mod'],
loader = importlib.PyPycFileLoader('_pkg.mod', mapping['_pkg.mod'],
False)
module = loader.load_module('_pkg.mod')
self.assert_('_pkg.mod' in sys.modules)
@ -57,7 +57,7 @@ class SimpleTest(unittest.TestCase):
def test_module_reuse(self):
with source_util.create_modules('_temp') as mapping:
loader = importlib._PyFileLoader('_temp', mapping['_temp'], False)
loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False)
module = loader.load_module('_temp')
module_id = id(module)
module_dict_id = id(module.__dict__)
@ -87,7 +87,7 @@ class SimpleTest(unittest.TestCase):
setattr(orig_module, attr, value)
with open(mapping[name], 'w') as file:
file.write('+++ bad syntax +++')
loader = importlib._PyFileLoader('_temp', mapping['_temp'], False)
loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False)
self.assertRaises(SyntaxError, loader.load_module, name)
for attr in attributes:
self.assertEqual(getattr(orig_module, attr), value)
@ -97,7 +97,7 @@ class SimpleTest(unittest.TestCase):
with source_util.create_modules('_temp') as mapping:
with open(mapping['_temp'], 'w') as file:
file.write('=')
loader = importlib._PyFileLoader('_temp', mapping['_temp'], False)
loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False)
self.assertRaises(SyntaxError, loader.load_module, '_temp')
self.assert_('_temp' not in sys.modules)
@ -112,7 +112,7 @@ class DontWriteBytecodeTest(unittest.TestCase):
@source_util.writes_bytecode
def run_test(self, assertion):
with source_util.create_modules('_temp') as mapping:
loader = importlib._PyFileLoader('_temp', mapping['_temp'], False)
loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False)
loader.load_module('_temp')
bytecode_path = source_util.bytecode_path(mapping['_temp'])
assertion(bytecode_path)
@ -144,7 +144,7 @@ class BadDataTest(unittest.TestCase):
with open(bytecode_path, 'r+b') as file:
file.seek(0)
file.write(b'\x00\x00\x00\x00')
loader = importlib._PyFileLoader('_temp', mapping['_temp'], False)
loader = importlib.PyPycFileLoader('_temp', mapping['_temp'], False)
self.assertRaises(ImportError, loader.load_module, '_temp')
self.assert_('_temp' not in sys.modules)
@ -159,7 +159,7 @@ class SourceBytecodeInteraction(unittest.TestCase):
"""
def import_(self, file, module, *, pkg=False):
loader = importlib._PyFileLoader(module, file, pkg)
loader = importlib.PyPycFileLoader(module, file, pkg)
return loader.load_module(module)
def run_test(self, test, *create, pkg=False):
@ -171,7 +171,7 @@ class SourceBytecodeInteraction(unittest.TestCase):
import_name = test.rsplit('.', 1)[0]
else:
import_name = test
loader = importlib._PyFileLoader(import_name, mapping[test], pkg)
loader = importlib.PyPycFileLoader(import_name, mapping[test], pkg)
# Because some platforms only have a granularity to the second for
# atime you can't check the physical files. Instead just make it an
# exception trigger if source was read.
@ -212,7 +212,7 @@ class BadBytecodeTest(unittest.TestCase):
"""
def import_(self, file, module_name):
loader = importlib._PyFileLoader(module_name, file, False)
loader = importlib.PyPycFileLoader(module_name, file, False)
module = loader.load_module(module_name)
self.assert_(module_name in sys.modules)

View File

@ -35,7 +35,7 @@ class EncodingTest(unittest.TestCase):
with source_util.create_modules(self.module_name) as mapping:
with open(mapping[self.module_name], 'wb')as file:
file.write(source)
loader = importlib._PyFileLoader(self.module_name,
loader = importlib.PyPycFileLoader(self.module_name,
mapping[self.module_name], False)
return loader.load_module(self.module_name)
@ -96,7 +96,7 @@ class LineEndingTest(unittest.TestCase):
with source_util.create_modules(module_name) as mapping:
with open(mapping[module_name], 'wb') as file:
file.write(source)
loader = importlib._PyFileLoader(module_name, mapping[module_name],
loader = importlib.PyPycFileLoader(module_name, mapping[module_name],
False)
return loader.load_module(module_name)