bpo-39791 native hooks for importlib.resources.files (GH-20576)

* Provide native .files support on SourceFileLoader.

* Add native importlib.resources.files() support to zipimporter. Remove fallback support.

* make regen-all

* 📜🤖 Added by blurb_it.

* Move 'files' into the ResourceReader so it can carry the relevant module name context.

* Create 'importlib.readers' module and add FileReader to it.

* Add zip reader and rely on it for a TraversableResources object on zipimporter.

* Remove TraversableAdapter, no longer needed.

* Update blurb.

* Replace backslashes with forward slashes.

* Incorporate changes from importlib_metadata 2.0, finalizing the interface for extension via get_resource_reader.

Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
This commit is contained in:
Jason R. Coombs 2020-06-07 21:00:51 -04:00 committed by GitHub
parent 972ab03276
commit 843c277656
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 2131 additions and 2372 deletions

View File

@ -982,32 +982,10 @@ class FileLoader:
with _io.FileIO(path, 'r') as file: with _io.FileIO(path, 'r') as file:
return file.read() return file.read()
# ResourceReader ABC API.
@_check_name @_check_name
def get_resource_reader(self, module): def get_resource_reader(self, module):
if self.is_package(module): from importlib.readers import FileReader
return self return FileReader(self)
return None
def open_resource(self, resource):
path = _path_join(_path_split(self.path)[0], resource)
return _io.FileIO(path, 'r')
def resource_path(self, resource):
if not self.is_resource(resource):
raise FileNotFoundError
path = _path_join(_path_split(self.path)[0], resource)
return path
def is_resource(self, name):
if path_sep in name:
return False
path = _path_join(_path_split(self.path)[0], name)
return _path_isfile(path)
def contents(self):
return iter(_os.listdir(_path_split(self.path)[0]))
class SourceFileLoader(FileLoader, SourceLoader): class SourceFileLoader(FileLoader, SourceLoader):

View File

@ -1,9 +1,72 @@
import os import os
import pathlib import pathlib
import zipfile
import tempfile import tempfile
import functools import functools
import contextlib import contextlib
import types
import importlib
from typing import Union, Any, Optional
from .abc import ResourceReader
Package = Union[types.ModuleType, str]
def files(package):
"""
Get a Traversable resource from a package
"""
return from_package(get_package(package))
def normalize_path(path):
# type: (Any) -> str
"""Normalize a path by ensuring it is a string.
If the resulting string contains path separators, an exception is raised.
"""
str_path = str(path)
parent, file_name = os.path.split(str_path)
if parent:
raise ValueError('{!r} must be only a file name'.format(path))
return file_name
def get_resource_reader(package):
# type: (types.ModuleType) -> Optional[ResourceReader]
"""
Return the package's loader if it's a ResourceReader.
"""
# We can't use
# a issubclass() check here because apparently abc.'s __subclasscheck__()
# hook wants to create a weak reference to the object, but
# zipimport.zipimporter does not support weak references, resulting in a
# TypeError. That seems terrible.
spec = package.__spec__
reader = getattr(spec.loader, 'get_resource_reader', None)
if reader is None:
return None
return reader(spec.name)
def resolve(cand):
# type: (Package) -> types.ModuleType
return (
cand if isinstance(cand, types.ModuleType)
else importlib.import_module(cand)
)
def get_package(package):
# type: (Package) -> types.ModuleType
"""Take a package name or module object and return the module.
Raise an exception if the resolved module is not a package.
"""
resolved = resolve(package)
if resolved.__spec__.submodule_search_locations is None:
raise TypeError('{!r} is not a package'.format(package))
return resolved
def from_package(package): def from_package(package):
@ -12,27 +75,8 @@ def from_package(package):
""" """
spec = package.__spec__ spec = package.__spec__
return from_traversable_resources(spec) or fallback_resources(spec) reader = spec.loader.get_resource_reader(spec.name)
return reader.files()
def from_traversable_resources(spec):
"""
If the spec.loader implements TraversableResources,
directly or implicitly, it will have a ``files()`` method.
"""
with contextlib.suppress(AttributeError):
return spec.loader.files()
def fallback_resources(spec):
package_directory = pathlib.Path(spec.origin).parent
try:
archive_path = spec.loader.archive
rel_path = package_directory.relative_to(archive_path)
return zipfile.Path(archive_path, str(rel_path) + '/')
except Exception:
pass
return package_directory
@contextlib.contextmanager @contextlib.contextmanager

View File

@ -468,7 +468,7 @@ class TraversableResources(ResourceReader):
raise FileNotFoundError(resource) raise FileNotFoundError(resource)
def is_resource(self, path): def is_resource(self, path):
return self.files().joinpath(path).isfile() return self.files().joinpath(path).is_file()
def contents(self): def contents(self):
return (item.name for item in self.files().iterdir()) return (item.name for item in self.files().iterdir())

30
Lib/importlib/readers.py Normal file
View File

@ -0,0 +1,30 @@
import zipfile
import pathlib
from . import abc
class FileReader(abc.TraversableResources):
def __init__(self, loader):
self.path = pathlib.Path(loader.path).parent
def files(self):
return self.path
class ZipReader(FileReader):
def __init__(self, loader, module):
_, _, name = module.rpartition('.')
prefix = loader.prefix.replace('\\', '/') + name + '/'
self.path = zipfile.Path(loader.archive, prefix)
def open_resource(self, resource):
try:
return super().open_resource(resource)
except KeyError as exc:
raise FileNotFoundError(exc.args[0])
def is_resource(self, path):
# workaround for `zipfile.Path.is_file` returning true
# for non-existent paths.
target = self.files().joinpath(path)
return target.is_file() and target.exists()

View File

@ -1,15 +1,13 @@
import os import os
from . import abc as resources_abc
from . import _common from . import _common
from ._common import as_file from ._common import as_file, files
from contextlib import contextmanager, suppress from contextlib import contextmanager, suppress
from importlib import import_module
from importlib.abc import ResourceLoader from importlib.abc import ResourceLoader
from io import BytesIO, TextIOWrapper from io import BytesIO, TextIOWrapper
from pathlib import Path from pathlib import Path
from types import ModuleType from types import ModuleType
from typing import ContextManager, Iterable, Optional, Union from typing import ContextManager, Iterable, Union
from typing import cast from typing import cast
from typing.io import BinaryIO, TextIO from typing.io import BinaryIO, TextIO
@ -33,60 +31,11 @@ Package = Union[str, ModuleType]
Resource = Union[str, os.PathLike] Resource = Union[str, os.PathLike]
def _resolve(name) -> ModuleType:
"""If name is a string, resolve to a module."""
if hasattr(name, '__spec__'):
return name
return import_module(name)
def _get_package(package) -> ModuleType:
"""Take a package name or module object and return the module.
If a name, the module is imported. If the resolved module
object is not a package, raise an exception.
"""
module = _resolve(package)
if module.__spec__.submodule_search_locations is None:
raise TypeError('{!r} is not a package'.format(package))
return module
def _normalize_path(path) -> str:
"""Normalize a path by ensuring it is a string.
If the resulting string contains path separators, an exception is raised.
"""
parent, file_name = os.path.split(path)
if parent:
raise ValueError('{!r} must be only a file name'.format(path))
return file_name
def _get_resource_reader(
package: ModuleType) -> Optional[resources_abc.ResourceReader]:
# Return the package's loader if it's a ResourceReader. We can't use
# a issubclass() check here because apparently abc.'s __subclasscheck__()
# hook wants to create a weak reference to the object, but
# zipimport.zipimporter does not support weak references, resulting in a
# TypeError. That seems terrible.
spec = package.__spec__
if hasattr(spec.loader, 'get_resource_reader'):
return cast(resources_abc.ResourceReader,
spec.loader.get_resource_reader(spec.name))
return None
def _check_location(package):
if package.__spec__.origin is None or not package.__spec__.has_location:
raise FileNotFoundError(f'Package has no location {package!r}')
def open_binary(package: Package, resource: Resource) -> BinaryIO: def open_binary(package: Package, resource: Resource) -> BinaryIO:
"""Return a file-like object opened for binary reading of the resource.""" """Return a file-like object opened for binary reading of the resource."""
resource = _normalize_path(resource) resource = _common.normalize_path(resource)
package = _get_package(package) package = _common.get_package(package)
reader = _get_resource_reader(package) reader = _common.get_resource_reader(package)
if reader is not None: if reader is not None:
return reader.open_resource(resource) return reader.open_resource(resource)
absolute_package_path = os.path.abspath( absolute_package_path = os.path.abspath(
@ -140,13 +89,6 @@ def read_text(package: Package,
return fp.read() return fp.read()
def files(package: Package) -> resources_abc.Traversable:
"""
Get a Traversable resource from a package
"""
return _common.from_package(_get_package(package))
def path( def path(
package: Package, resource: Resource, package: Package, resource: Resource,
) -> 'ContextManager[Path]': ) -> 'ContextManager[Path]':
@ -158,17 +100,18 @@ def path(
raised if the file was deleted prior to the context manager raised if the file was deleted prior to the context manager
exiting). exiting).
""" """
reader = _get_resource_reader(_get_package(package)) reader = _common.get_resource_reader(_common.get_package(package))
return ( return (
_path_from_reader(reader, resource) _path_from_reader(reader, resource)
if reader else if reader else
_common.as_file(files(package).joinpath(_normalize_path(resource))) _common.as_file(
_common.files(package).joinpath(_common.normalize_path(resource)))
) )
@contextmanager @contextmanager
def _path_from_reader(reader, resource): def _path_from_reader(reader, resource):
norm_resource = _normalize_path(resource) norm_resource = _common.normalize_path(resource)
with suppress(FileNotFoundError): with suppress(FileNotFoundError):
yield Path(reader.resource_path(norm_resource)) yield Path(reader.resource_path(norm_resource))
return return
@ -182,9 +125,9 @@ def is_resource(package: Package, name: str) -> bool:
Directories are *not* resources. Directories are *not* resources.
""" """
package = _get_package(package) package = _common.get_package(package)
_normalize_path(name) _common.normalize_path(name)
reader = _get_resource_reader(package) reader = _common.get_resource_reader(package)
if reader is not None: if reader is not None:
return reader.is_resource(name) return reader.is_resource(name)
package_contents = set(contents(package)) package_contents = set(contents(package))
@ -200,8 +143,8 @@ def contents(package: Package) -> Iterable[str]:
not considered resources. Use `is_resource()` on each entry returned here not considered resources. Use `is_resource()` on each entry returned here
to check if it is a resource or not. to check if it is a resource or not.
""" """
package = _get_package(package) package = _common.get_package(package)
reader = _get_resource_reader(package) reader = _common.get_resource_reader(package)
if reader is not None: if reader is not None:
return reader.contents() return reader.contents()
# Is the package a namespace package? By definition, namespace packages # Is the package a namespace package? By definition, namespace packages

View File

@ -280,11 +280,8 @@ class zipimporter:
return None return None
except ZipImportError: except ZipImportError:
return None return None
if not _ZipImportResourceReader._registered: from importlib.readers import ZipReader
from importlib.abc import ResourceReader return ZipReader(self, fullname)
ResourceReader.register(_ZipImportResourceReader)
_ZipImportResourceReader._registered = True
return _ZipImportResourceReader(self, fullname)
def __repr__(self): def __repr__(self):
@ -719,74 +716,3 @@ def _get_module_code(self, fullname):
return code, ispackage, modpath return code, ispackage, modpath
else: else:
raise ZipImportError(f"can't find module {fullname!r}", name=fullname) raise ZipImportError(f"can't find module {fullname!r}", name=fullname)
class _ZipImportResourceReader:
"""Private class used to support ZipImport.get_resource_reader().
This class is allowed to reference all the innards and private parts of
the zipimporter.
"""
_registered = False
def __init__(self, zipimporter, fullname):
self.zipimporter = zipimporter
self.fullname = fullname
def open_resource(self, resource):
fullname_as_path = self.fullname.replace('.', '/')
path = f'{fullname_as_path}/{resource}'
from io import BytesIO
try:
return BytesIO(self.zipimporter.get_data(path))
except OSError:
raise FileNotFoundError(path)
def resource_path(self, resource):
# All resources are in the zip file, so there is no path to the file.
# Raising FileNotFoundError tells the higher level API to extract the
# binary data and create a temporary file.
raise FileNotFoundError
def is_resource(self, name):
# Maybe we could do better, but if we can get the data, it's a
# resource. Otherwise it isn't.
fullname_as_path = self.fullname.replace('.', '/')
path = f'{fullname_as_path}/{name}'
try:
self.zipimporter.get_data(path)
except OSError:
return False
return True
def contents(self):
# This is a bit convoluted, because fullname will be a module path,
# but _files is a list of file names relative to the top of the
# archive's namespace. We want to compare file paths to find all the
# names of things inside the module represented by fullname. So we
# turn the module path of fullname into a file path relative to the
# top of the archive, and then we iterate through _files looking for
# names inside that "directory".
from pathlib import Path
fullname_path = Path(self.zipimporter.get_filename(self.fullname))
relative_path = fullname_path.relative_to(self.zipimporter.archive)
# Don't forget that fullname names a package, so its path will include
# __init__.py, which we want to ignore.
assert relative_path.name == '__init__.py'
package_path = relative_path.parent
subdirs_seen = set()
for filename in self.zipimporter._files:
try:
relative = Path(filename).relative_to(package_path)
except ValueError:
continue
# If the path of the file (which is relative to the top of the zip
# namespace), relative to the package given when the resource
# reader was created, has a parent, then it's a name in a
# subdirectory and thus we skip it.
parent_name = relative.parent.name
if len(parent_name) == 0:
yield relative.name
elif parent_name not in subdirs_seen:
subdirs_seen.add(parent_name)
yield parent_name

View File

@ -0,0 +1 @@
Built-in loaders (SourceFileLoader and ZipImporter) now supply ``TraversableResources`` implementations for ``ResourceReader``, and the fallback function has been removed.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff