bpo-32248 - Implement `ResourceReader` and `get_resource_reader()` for zipimport (#5248)

This commit is contained in:
Barry Warsaw 2018-01-24 15:36:21 -05:00 committed by GitHub
parent 789e359f51
commit 6f6eb35f9b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 157 additions and 83 deletions

View File

@ -328,8 +328,12 @@ importlib.resources
This module provides several new APIs and one new ABC for access to, opening, This module provides several new APIs and one new ABC for access to, opening,
and reading *resources* inside packages. Resources are roughly akin to files and reading *resources* inside packages. Resources are roughly akin to files
inside of packages, but they needn't be actual files on the physical file inside of packages, but they needn't be actual files on the physical file
system. Module loaders can provide :class:`importlib.abc.ResourceReader` system. Module loaders can provide a :meth:`get_resource_reader()` function
implementations to support this new module's API. which returns a :class:`importlib.abc.ResourceReader` instance to support this
new API. Built-in file path loaders and zip file loaders both support this.
(see the PyPI package
`importlib_resources <http://importlib-resources.readthedocs.io/en/latest/>`_
as a compatible back port for older Python versions).
Improved Modules Improved Modules

View File

@ -12,7 +12,7 @@ from types import ModuleType
from typing import Iterator, Optional, Set, Union # noqa: F401 from typing import Iterator, Optional, Set, Union # noqa: F401
from typing import cast from typing import cast
from typing.io import BinaryIO, TextIO from typing.io import BinaryIO, TextIO
from zipfile import ZipFile from zipimport import ZipImportError
Package = Union[str, ModuleType] Package = Union[str, ModuleType]
@ -216,38 +216,7 @@ def is_resource(package: Package, name: str) -> bool:
# contents doesn't necessarily mean it's a resource. Directories are not # contents doesn't necessarily mean it's a resource. Directories are not
# resources, so let's try to find out if it's a directory or not. # resources, so let's try to find out if it's a directory or not.
path = Path(package.__spec__.origin).parent / name path = Path(package.__spec__.origin).parent / name
if path.is_file(): return path.is_file()
return True
if path.is_dir():
return False
# If it's not a file and it's not a directory, what is it? Well, this
# means the file doesn't exist on the file system, so it probably lives
# inside a zip file. We have to crack open the zip, look at its table of
# contents, and make sure that this entry doesn't have sub-entries.
archive_path = package.__spec__.loader.archive # type: ignore
package_directory = Path(package.__spec__.origin).parent
with ZipFile(archive_path) as zf:
toc = zf.namelist()
relpath = package_directory.relative_to(archive_path)
candidate_path = relpath / name
for entry in toc:
try:
relative_to_candidate = Path(entry).relative_to(candidate_path)
except ValueError:
# The two paths aren't relative to each other so we can ignore it.
continue
# Since directories aren't explicitly listed in the zip file, we must
# infer their 'directory-ness' by looking at the number of path
# components in the path relative to the package resource we're
# looking up. If there are zero additional parts, it's a file, i.e. a
# resource. If there are more than zero it's a directory, i.e. not a
# resource. It has to be one of these two cases.
return len(relative_to_candidate.parts) == 0
# I think it's impossible to get here. It would mean that we are looking
# for a resource in a zip file, there's an entry matching it in the return
# value of contents(), but we never actually found it in the zip's table of
# contents.
raise AssertionError('Impossible situation')
def contents(package: Package) -> Iterator[str]: def contents(package: Package) -> Iterator[str]:
@ -268,38 +237,85 @@ def contents(package: Package) -> Iterator[str]:
not package.__spec__.has_location): not package.__spec__.has_location):
return [] return []
package_directory = Path(package.__spec__.origin).parent package_directory = Path(package.__spec__.origin).parent
try: yield from os.listdir(str(package_directory))
yield from os.listdir(str(package_directory))
except (NotADirectoryError, FileNotFoundError):
# The package is probably in a zip file. # Private implementation of ResourceReader and get_resource_reader() for
archive_path = getattr(package.__spec__.loader, 'archive', None) # zipimport. Don't use these directly! We're implementing these in Python
if archive_path is None: # because 1) it's easier, 2) zipimport will likely get rewritten in Python
raise # itself at some point, so doing this all in C would just be a waste of
relpath = package_directory.relative_to(archive_path) # effort.
with ZipFile(archive_path) as zf:
toc = zf.namelist() class _ZipImportResourceReader(resources_abc.ResourceReader):
subdirs_seen = set() # type: Set """Private class used to support ZipImport.get_resource_reader().
for filename in toc:
path = Path(filename) This class is allowed to reference all the innards and private parts of
# Strip off any path component parts that are in common with the the zipimporter.
# package directory, relative to the zip archive's file system """
# path. This gives us all the parts that live under the named
# package inside the zip file. If the length of these subparts is def __init__(self, zipimporter, fullname):
# exactly 1, then it is situated inside the package. The resulting self.zipimporter = zipimporter
# length will be 0 if it's above the package, and it will be self.fullname = fullname
# greater than 1 if it lives in a subdirectory of the package
# directory. def open_resource(self, resource):
# path = f'{self.fullname}/{resource}'
# However, since directories themselves don't appear in the zip try:
# archive as a separate entry, we need to return the first path return BytesIO(self.zipimporter.get_data(path))
# component for any case that has > 1 subparts -- but only once! except OSError:
if path.parts[:len(relpath.parts)] != relpath.parts: raise FileNotFoundError
def resource_path(self, resource):
# All resources are in the zip file, so there is no path to the file.
# Raising FileNotFoundError tells the higher level API to extract the
# binary data and create a temporary file.
raise FileNotFoundError
def is_resource(self, name):
# Maybe we could do better, but if we can get the data, it's a
# resource. Otherwise it isn't.
path = f'{self.fullname}/{name}'
try:
self.zipimporter.get_data(path)
except OSError:
return False
return True
def contents(self):
# This is a bit convoluted, because fullname will be a module path,
# but _files is a list of file names relative to the top of the
# archive's namespace. We want to compare file paths to find all the
# names of things inside the module represented by fullname. So we
# turn the module path of fullname into a file path relative to the
# top of the archive, and then we iterate through _files looking for
# names inside that "directory".
fullname_path = Path(self.zipimporter.get_filename(self.fullname))
relative_path = fullname_path.relative_to(self.zipimporter.archive)
# Don't forget that fullname names a package, so its path will include
# __init__.py, which we want to ignore.
assert relative_path.name == '__init__.py'
package_path = relative_path.parent
subdirs_seen = set()
for filename in self.zipimporter._files:
try:
relative = Path(filename).relative_to(package_path)
except ValueError:
continue continue
subparts = path.parts[len(relpath.parts):] # If the path of the file (which is relative to the top of the zip
if len(subparts) == 1: # namespace), relative to the package given when the resource
yield subparts[0] # reader was created, has a parent, then it's a name in a
elif len(subparts) > 1: # subdirectory and thus we skip it.
subdir = subparts[0] parent_name = relative.parent.name
if subdir not in subdirs_seen: if len(parent_name) == 0:
subdirs_seen.add(subdir) yield relative.name
yield subdir elif parent_name not in subdirs_seen:
subdirs_seen.add(parent_name)
yield parent_name
def _zipimport_get_resource_reader(zipimporter, fullname):
try:
if not zipimporter.is_package(fullname):
return None
except ZipImportError:
return None
return _ZipImportResourceReader(zipimporter, fullname)

View File

@ -1,13 +1,6 @@
Add :class:`importlib.abc.ResourceReader` as an ABC to provide a Add :mod:`importlib.resources` and :class:`importlib.abc.ResourceReader` as
unified API for reading resources contained within packages. Loaders the unified API for reading resources contained within packages. Loaders
wishing to support resource reading are expected to implement the wishing to support resource reading must implement the
``get_resource_reader(fullname)`` method. :meth:`get_resource_reader()` method. File-based and zipimport-based loaders
both implement these APIs. :class:`importlib.abc.ResourceLoader` is
Also add :mod:`importlib.resources` as the stdlib port of the deprecated in favor of these new APIs.
``importlib_resources`` PyPI package. The modules provides a high-level
API for end-users to read resources in a nicer fashion than having to
directly interact with low-level details such as loaders.
Thanks to this work, :class:`importlib.abc.ResourceLoader` has now
been documented as deprecated due to its under-specified nature and
lack of features as provided by :class:`importlib.abc.ResourceReader`.

View File

@ -291,4 +291,35 @@ zipimport_zipimporter_get_source(ZipImporter *self, PyObject *arg)
exit: exit:
return return_value; return return_value;
} }
/*[clinic end generated code: output=93cb62a3a9752b9f input=a9049054013a1b77]*/
PyDoc_STRVAR(zipimport_zipimporter_get_resource_reader__doc__,
"get_resource_reader($self, fullname, /)\n"
"--\n"
"\n"
"Return the ResourceReader for a package in a zip file.\n"
"\n"
"If \'fullname\' is a package within the zip file, return the \'ResourceReader\'\n"
"object for the package. Otherwise return None.");
#define ZIPIMPORT_ZIPIMPORTER_GET_RESOURCE_READER_METHODDEF \
{"get_resource_reader", (PyCFunction)zipimport_zipimporter_get_resource_reader, METH_O, zipimport_zipimporter_get_resource_reader__doc__},
static PyObject *
zipimport_zipimporter_get_resource_reader_impl(ZipImporter *self,
PyObject *fullname);
static PyObject *
zipimport_zipimporter_get_resource_reader(ZipImporter *self, PyObject *arg)
{
PyObject *return_value = NULL;
PyObject *fullname;
if (!PyArg_Parse(arg, "U:get_resource_reader", &fullname)) {
goto exit;
}
return_value = zipimport_zipimporter_get_resource_reader_impl(self, fullname);
exit:
return return_value;
}
/*[clinic end generated code: output=0b57adfe21373512 input=a9049054013a1b77]*/

View File

@ -784,6 +784,35 @@ zipimport_zipimporter_get_source_impl(ZipImporter *self, PyObject *fullname)
Py_RETURN_NONE; Py_RETURN_NONE;
} }
/*[clinic input]
zipimport.zipimporter.get_resource_reader
fullname: unicode
/
Return the ResourceReader for a package in a zip file.
If 'fullname' is a package within the zip file, return the 'ResourceReader'
object for the package. Otherwise return None.
[clinic start generated code]*/
static PyObject *
zipimport_zipimporter_get_resource_reader_impl(ZipImporter *self,
PyObject *fullname)
/*[clinic end generated code: output=5e367d431f830726 input=bfab94d736e99151]*/
{
PyObject *module = PyImport_ImportModule("importlib.resources");
if (module == NULL) {
return NULL;
}
PyObject *retval = PyObject_CallMethod(
module, "_zipimport_get_resource_reader",
"OO", (PyObject *)self, fullname);
Py_DECREF(module);
return retval;
}
static PyMethodDef zipimporter_methods[] = { static PyMethodDef zipimporter_methods[] = {
ZIPIMPORT_ZIPIMPORTER_FIND_MODULE_METHODDEF ZIPIMPORT_ZIPIMPORTER_FIND_MODULE_METHODDEF
@ -794,6 +823,7 @@ static PyMethodDef zipimporter_methods[] = {
ZIPIMPORT_ZIPIMPORTER_GET_DATA_METHODDEF ZIPIMPORT_ZIPIMPORTER_GET_DATA_METHODDEF
ZIPIMPORT_ZIPIMPORTER_GET_CODE_METHODDEF ZIPIMPORT_ZIPIMPORTER_GET_CODE_METHODDEF
ZIPIMPORT_ZIPIMPORTER_GET_SOURCE_METHODDEF ZIPIMPORT_ZIPIMPORTER_GET_SOURCE_METHODDEF
ZIPIMPORT_ZIPIMPORTER_GET_RESOURCE_READER_METHODDEF
{NULL, NULL} /* sentinel */ {NULL, NULL} /* sentinel */
}; };