bpo-32248 - Implement `ResourceReader` and `get_resource_reader()` for zipimport (#5248)
This commit is contained in:
parent
789e359f51
commit
6f6eb35f9b
|
@ -328,8 +328,12 @@ importlib.resources
|
||||||
This module provides several new APIs and one new ABC for access to, opening,
|
This module provides several new APIs and one new ABC for access to, opening,
|
||||||
and reading *resources* inside packages. Resources are roughly akin to files
|
and reading *resources* inside packages. Resources are roughly akin to files
|
||||||
inside of packages, but they needn't be actual files on the physical file
|
inside of packages, but they needn't be actual files on the physical file
|
||||||
system. Module loaders can provide :class:`importlib.abc.ResourceReader`
|
system. Module loaders can provide a :meth:`get_resource_reader()` function
|
||||||
implementations to support this new module's API.
|
which returns a :class:`importlib.abc.ResourceReader` instance to support this
|
||||||
|
new API. Built-in file path loaders and zip file loaders both support this.
|
||||||
|
(see the PyPI package
|
||||||
|
`importlib_resources <http://importlib-resources.readthedocs.io/en/latest/>`_
|
||||||
|
as a compatible back port for older Python versions).
|
||||||
|
|
||||||
|
|
||||||
Improved Modules
|
Improved Modules
|
||||||
|
|
|
@ -12,7 +12,7 @@ from types import ModuleType
|
||||||
from typing import Iterator, Optional, Set, Union # noqa: F401
|
from typing import Iterator, Optional, Set, Union # noqa: F401
|
||||||
from typing import cast
|
from typing import cast
|
||||||
from typing.io import BinaryIO, TextIO
|
from typing.io import BinaryIO, TextIO
|
||||||
from zipfile import ZipFile
|
from zipimport import ZipImportError
|
||||||
|
|
||||||
|
|
||||||
Package = Union[str, ModuleType]
|
Package = Union[str, ModuleType]
|
||||||
|
@ -216,38 +216,7 @@ def is_resource(package: Package, name: str) -> bool:
|
||||||
# contents doesn't necessarily mean it's a resource. Directories are not
|
# contents doesn't necessarily mean it's a resource. Directories are not
|
||||||
# resources, so let's try to find out if it's a directory or not.
|
# resources, so let's try to find out if it's a directory or not.
|
||||||
path = Path(package.__spec__.origin).parent / name
|
path = Path(package.__spec__.origin).parent / name
|
||||||
if path.is_file():
|
return path.is_file()
|
||||||
return True
|
|
||||||
if path.is_dir():
|
|
||||||
return False
|
|
||||||
# If it's not a file and it's not a directory, what is it? Well, this
|
|
||||||
# means the file doesn't exist on the file system, so it probably lives
|
|
||||||
# inside a zip file. We have to crack open the zip, look at its table of
|
|
||||||
# contents, and make sure that this entry doesn't have sub-entries.
|
|
||||||
archive_path = package.__spec__.loader.archive # type: ignore
|
|
||||||
package_directory = Path(package.__spec__.origin).parent
|
|
||||||
with ZipFile(archive_path) as zf:
|
|
||||||
toc = zf.namelist()
|
|
||||||
relpath = package_directory.relative_to(archive_path)
|
|
||||||
candidate_path = relpath / name
|
|
||||||
for entry in toc:
|
|
||||||
try:
|
|
||||||
relative_to_candidate = Path(entry).relative_to(candidate_path)
|
|
||||||
except ValueError:
|
|
||||||
# The two paths aren't relative to each other so we can ignore it.
|
|
||||||
continue
|
|
||||||
# Since directories aren't explicitly listed in the zip file, we must
|
|
||||||
# infer their 'directory-ness' by looking at the number of path
|
|
||||||
# components in the path relative to the package resource we're
|
|
||||||
# looking up. If there are zero additional parts, it's a file, i.e. a
|
|
||||||
# resource. If there are more than zero it's a directory, i.e. not a
|
|
||||||
# resource. It has to be one of these two cases.
|
|
||||||
return len(relative_to_candidate.parts) == 0
|
|
||||||
# I think it's impossible to get here. It would mean that we are looking
|
|
||||||
# for a resource in a zip file, there's an entry matching it in the return
|
|
||||||
# value of contents(), but we never actually found it in the zip's table of
|
|
||||||
# contents.
|
|
||||||
raise AssertionError('Impossible situation')
|
|
||||||
|
|
||||||
|
|
||||||
def contents(package: Package) -> Iterator[str]:
|
def contents(package: Package) -> Iterator[str]:
|
||||||
|
@ -268,38 +237,85 @@ def contents(package: Package) -> Iterator[str]:
|
||||||
not package.__spec__.has_location):
|
not package.__spec__.has_location):
|
||||||
return []
|
return []
|
||||||
package_directory = Path(package.__spec__.origin).parent
|
package_directory = Path(package.__spec__.origin).parent
|
||||||
try:
|
|
||||||
yield from os.listdir(str(package_directory))
|
yield from os.listdir(str(package_directory))
|
||||||
except (NotADirectoryError, FileNotFoundError):
|
|
||||||
# The package is probably in a zip file.
|
|
||||||
archive_path = getattr(package.__spec__.loader, 'archive', None)
|
# Private implementation of ResourceReader and get_resource_reader() for
|
||||||
if archive_path is None:
|
# zipimport. Don't use these directly! We're implementing these in Python
|
||||||
raise
|
# because 1) it's easier, 2) zipimport will likely get rewritten in Python
|
||||||
relpath = package_directory.relative_to(archive_path)
|
# itself at some point, so doing this all in C would just be a waste of
|
||||||
with ZipFile(archive_path) as zf:
|
# effort.
|
||||||
toc = zf.namelist()
|
|
||||||
subdirs_seen = set() # type: Set
|
class _ZipImportResourceReader(resources_abc.ResourceReader):
|
||||||
for filename in toc:
|
"""Private class used to support ZipImport.get_resource_reader().
|
||||||
path = Path(filename)
|
|
||||||
# Strip off any path component parts that are in common with the
|
This class is allowed to reference all the innards and private parts of
|
||||||
# package directory, relative to the zip archive's file system
|
the zipimporter.
|
||||||
# path. This gives us all the parts that live under the named
|
"""
|
||||||
# package inside the zip file. If the length of these subparts is
|
|
||||||
# exactly 1, then it is situated inside the package. The resulting
|
def __init__(self, zipimporter, fullname):
|
||||||
# length will be 0 if it's above the package, and it will be
|
self.zipimporter = zipimporter
|
||||||
# greater than 1 if it lives in a subdirectory of the package
|
self.fullname = fullname
|
||||||
# directory.
|
|
||||||
#
|
def open_resource(self, resource):
|
||||||
# However, since directories themselves don't appear in the zip
|
path = f'{self.fullname}/{resource}'
|
||||||
# archive as a separate entry, we need to return the first path
|
try:
|
||||||
# component for any case that has > 1 subparts -- but only once!
|
return BytesIO(self.zipimporter.get_data(path))
|
||||||
if path.parts[:len(relpath.parts)] != relpath.parts:
|
except OSError:
|
||||||
|
raise FileNotFoundError
|
||||||
|
|
||||||
|
def resource_path(self, resource):
|
||||||
|
# All resources are in the zip file, so there is no path to the file.
|
||||||
|
# Raising FileNotFoundError tells the higher level API to extract the
|
||||||
|
# binary data and create a temporary file.
|
||||||
|
raise FileNotFoundError
|
||||||
|
|
||||||
|
def is_resource(self, name):
|
||||||
|
# Maybe we could do better, but if we can get the data, it's a
|
||||||
|
# resource. Otherwise it isn't.
|
||||||
|
path = f'{self.fullname}/{name}'
|
||||||
|
try:
|
||||||
|
self.zipimporter.get_data(path)
|
||||||
|
except OSError:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def contents(self):
|
||||||
|
# This is a bit convoluted, because fullname will be a module path,
|
||||||
|
# but _files is a list of file names relative to the top of the
|
||||||
|
# archive's namespace. We want to compare file paths to find all the
|
||||||
|
# names of things inside the module represented by fullname. So we
|
||||||
|
# turn the module path of fullname into a file path relative to the
|
||||||
|
# top of the archive, and then we iterate through _files looking for
|
||||||
|
# names inside that "directory".
|
||||||
|
fullname_path = Path(self.zipimporter.get_filename(self.fullname))
|
||||||
|
relative_path = fullname_path.relative_to(self.zipimporter.archive)
|
||||||
|
# Don't forget that fullname names a package, so its path will include
|
||||||
|
# __init__.py, which we want to ignore.
|
||||||
|
assert relative_path.name == '__init__.py'
|
||||||
|
package_path = relative_path.parent
|
||||||
|
subdirs_seen = set()
|
||||||
|
for filename in self.zipimporter._files:
|
||||||
|
try:
|
||||||
|
relative = Path(filename).relative_to(package_path)
|
||||||
|
except ValueError:
|
||||||
continue
|
continue
|
||||||
subparts = path.parts[len(relpath.parts):]
|
# If the path of the file (which is relative to the top of the zip
|
||||||
if len(subparts) == 1:
|
# namespace), relative to the package given when the resource
|
||||||
yield subparts[0]
|
# reader was created, has a parent, then it's a name in a
|
||||||
elif len(subparts) > 1:
|
# subdirectory and thus we skip it.
|
||||||
subdir = subparts[0]
|
parent_name = relative.parent.name
|
||||||
if subdir not in subdirs_seen:
|
if len(parent_name) == 0:
|
||||||
subdirs_seen.add(subdir)
|
yield relative.name
|
||||||
yield subdir
|
elif parent_name not in subdirs_seen:
|
||||||
|
subdirs_seen.add(parent_name)
|
||||||
|
yield parent_name
|
||||||
|
|
||||||
|
|
||||||
|
def _zipimport_get_resource_reader(zipimporter, fullname):
|
||||||
|
try:
|
||||||
|
if not zipimporter.is_package(fullname):
|
||||||
|
return None
|
||||||
|
except ZipImportError:
|
||||||
|
return None
|
||||||
|
return _ZipImportResourceReader(zipimporter, fullname)
|
||||||
|
|
|
@ -1,13 +1,6 @@
|
||||||
Add :class:`importlib.abc.ResourceReader` as an ABC to provide a
|
Add :mod:`importlib.resources` and :class:`importlib.abc.ResourceReader` as
|
||||||
unified API for reading resources contained within packages. Loaders
|
the unified API for reading resources contained within packages. Loaders
|
||||||
wishing to support resource reading are expected to implement the
|
wishing to support resource reading must implement the
|
||||||
``get_resource_reader(fullname)`` method.
|
:meth:`get_resource_reader()` method. File-based and zipimport-based loaders
|
||||||
|
both implement these APIs. :class:`importlib.abc.ResourceLoader` is
|
||||||
Also add :mod:`importlib.resources` as the stdlib port of the
|
deprecated in favor of these new APIs.
|
||||||
``importlib_resources`` PyPI package. The modules provides a high-level
|
|
||||||
API for end-users to read resources in a nicer fashion than having to
|
|
||||||
directly interact with low-level details such as loaders.
|
|
||||||
|
|
||||||
Thanks to this work, :class:`importlib.abc.ResourceLoader` has now
|
|
||||||
been documented as deprecated due to its under-specified nature and
|
|
||||||
lack of features as provided by :class:`importlib.abc.ResourceReader`.
|
|
||||||
|
|
|
@ -291,4 +291,35 @@ zipimport_zipimporter_get_source(ZipImporter *self, PyObject *arg)
|
||||||
exit:
|
exit:
|
||||||
return return_value;
|
return return_value;
|
||||||
}
|
}
|
||||||
/*[clinic end generated code: output=93cb62a3a9752b9f input=a9049054013a1b77]*/
|
|
||||||
|
PyDoc_STRVAR(zipimport_zipimporter_get_resource_reader__doc__,
|
||||||
|
"get_resource_reader($self, fullname, /)\n"
|
||||||
|
"--\n"
|
||||||
|
"\n"
|
||||||
|
"Return the ResourceReader for a package in a zip file.\n"
|
||||||
|
"\n"
|
||||||
|
"If \'fullname\' is a package within the zip file, return the \'ResourceReader\'\n"
|
||||||
|
"object for the package. Otherwise return None.");
|
||||||
|
|
||||||
|
#define ZIPIMPORT_ZIPIMPORTER_GET_RESOURCE_READER_METHODDEF \
|
||||||
|
{"get_resource_reader", (PyCFunction)zipimport_zipimporter_get_resource_reader, METH_O, zipimport_zipimporter_get_resource_reader__doc__},
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
zipimport_zipimporter_get_resource_reader_impl(ZipImporter *self,
|
||||||
|
PyObject *fullname);
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
zipimport_zipimporter_get_resource_reader(ZipImporter *self, PyObject *arg)
|
||||||
|
{
|
||||||
|
PyObject *return_value = NULL;
|
||||||
|
PyObject *fullname;
|
||||||
|
|
||||||
|
if (!PyArg_Parse(arg, "U:get_resource_reader", &fullname)) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
return_value = zipimport_zipimporter_get_resource_reader_impl(self, fullname);
|
||||||
|
|
||||||
|
exit:
|
||||||
|
return return_value;
|
||||||
|
}
|
||||||
|
/*[clinic end generated code: output=0b57adfe21373512 input=a9049054013a1b77]*/
|
||||||
|
|
|
@ -784,6 +784,35 @@ zipimport_zipimporter_get_source_impl(ZipImporter *self, PyObject *fullname)
|
||||||
Py_RETURN_NONE;
|
Py_RETURN_NONE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*[clinic input]
|
||||||
|
zipimport.zipimporter.get_resource_reader
|
||||||
|
|
||||||
|
fullname: unicode
|
||||||
|
/
|
||||||
|
|
||||||
|
Return the ResourceReader for a package in a zip file.
|
||||||
|
|
||||||
|
If 'fullname' is a package within the zip file, return the 'ResourceReader'
|
||||||
|
object for the package. Otherwise return None.
|
||||||
|
|
||||||
|
[clinic start generated code]*/
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
zipimport_zipimporter_get_resource_reader_impl(ZipImporter *self,
|
||||||
|
PyObject *fullname)
|
||||||
|
/*[clinic end generated code: output=5e367d431f830726 input=bfab94d736e99151]*/
|
||||||
|
{
|
||||||
|
PyObject *module = PyImport_ImportModule("importlib.resources");
|
||||||
|
if (module == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
PyObject *retval = PyObject_CallMethod(
|
||||||
|
module, "_zipimport_get_resource_reader",
|
||||||
|
"OO", (PyObject *)self, fullname);
|
||||||
|
Py_DECREF(module);
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static PyMethodDef zipimporter_methods[] = {
|
static PyMethodDef zipimporter_methods[] = {
|
||||||
ZIPIMPORT_ZIPIMPORTER_FIND_MODULE_METHODDEF
|
ZIPIMPORT_ZIPIMPORTER_FIND_MODULE_METHODDEF
|
||||||
|
@ -794,6 +823,7 @@ static PyMethodDef zipimporter_methods[] = {
|
||||||
ZIPIMPORT_ZIPIMPORTER_GET_DATA_METHODDEF
|
ZIPIMPORT_ZIPIMPORTER_GET_DATA_METHODDEF
|
||||||
ZIPIMPORT_ZIPIMPORTER_GET_CODE_METHODDEF
|
ZIPIMPORT_ZIPIMPORTER_GET_CODE_METHODDEF
|
||||||
ZIPIMPORT_ZIPIMPORTER_GET_SOURCE_METHODDEF
|
ZIPIMPORT_ZIPIMPORTER_GET_SOURCE_METHODDEF
|
||||||
|
ZIPIMPORT_ZIPIMPORTER_GET_RESOURCE_READER_METHODDEF
|
||||||
{NULL, NULL} /* sentinel */
|
{NULL, NULL} /* sentinel */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue