bpo-32248 - Implement `ResourceReader` and `get_resource_reader()` for zipimport (#5248)

2018-01-24 15:36:21 -05:00 · 2018-01-24 15:36:21 -05:00 · 6f6eb35f9b
parent 789e359f51
commit 6f6eb35f9b
5 changed files with 157 additions and 83 deletions
--- a/Doc/whatsnew/3.7.rst
+++ b/Doc/whatsnew/3.7.rst
@ -328,8 +328,12 @@ importlib.resources
 This module provides several new APIs and one new ABC for access to, opening,
 and reading *resources* inside packages.  Resources are roughly akin to files
 inside of packages, but they needn't be actual files on the physical file
-system.  Module loaders can provide :class:`importlib.abc.ResourceReader`
+system.  Module loaders can provide a :meth:`get_resource_reader()` function
-implementations to support this new module's API.
+which returns a :class:`importlib.abc.ResourceReader` instance to support this
 new API.  Built-in file path loaders and zip file loaders both support this.
 (see the PyPI package
 `importlib_resources <http://importlib-resources.readthedocs.io/en/latest/>`_
 as a compatible back port for older Python versions).
 Improved Modules
--- a/Lib/importlib/resources.py
+++ b/Lib/importlib/resources.py
@ -12,7 +12,7 @@ from types import ModuleType
 from typing import Iterator, Optional, Set, Union   # noqa: F401
 from typing import cast
 from typing.io import BinaryIO, TextIO
-from zipfile import ZipFile
+from zipimport import ZipImportError
 Package = Union[str, ModuleType]
@ -216,38 +216,7 @@ def is_resource(package: Package, name: str) -> bool:
    # contents doesn't necessarily mean it's a resource.  Directories are not
    # resources, so let's try to find out if it's a directory or not.
    path = Path(package.__spec__.origin).parent / name
-    if path.is_file():
+    return path.is_file()
        return True
    if path.is_dir():
        return False
    # If it's not a file and it's not a directory, what is it?  Well, this
    # means the file doesn't exist on the file system, so it probably lives
    # inside a zip file.  We have to crack open the zip, look at its table of
    # contents, and make sure that this entry doesn't have sub-entries.
    archive_path = package.__spec__.loader.archive   # type: ignore
    package_directory = Path(package.__spec__.origin).parent
    with ZipFile(archive_path) as zf:
        toc = zf.namelist()
    relpath = package_directory.relative_to(archive_path)
    candidate_path = relpath / name
    for entry in toc:
        try:
            relative_to_candidate = Path(entry).relative_to(candidate_path)
        except ValueError:
            # The two paths aren't relative to each other so we can ignore it.
            continue
        # Since directories aren't explicitly listed in the zip file, we must
        # infer their 'directory-ness' by looking at the number of path
        # components in the path relative to the package resource we're
        # looking up.  If there are zero additional parts, it's a file, i.e. a
        # resource.  If there are more than zero it's a directory, i.e. not a
        # resource.  It has to be one of these two cases.
        return len(relative_to_candidate.parts) == 0
    # I think it's impossible to get here.  It would mean that we are looking
    # for a resource in a zip file, there's an entry matching it in the return
    # value of contents(), but we never actually found it in the zip's table of
    # contents.
    raise AssertionError('Impossible situation')
 def contents(package: Package) -> Iterator[str]:
@ -268,38 +237,85 @@ def contents(package: Package) -> Iterator[str]:
            not package.__spec__.has_location):
        return []
    package_directory = Path(package.__spec__.origin).parent
    try:
    yield from os.listdir(str(package_directory))
-    except (NotADirectoryError, FileNotFoundError):
+
-        # The package is probably in a zip file.
+
-        archive_path = getattr(package.__spec__.loader, 'archive', None)
+# Private implementation of ResourceReader and get_resource_reader() for
-        if archive_path is None:
+# zipimport.  Don't use these directly!  We're implementing these in Python
-            raise
+# because 1) it's easier, 2) zipimport will likely get rewritten in Python
-        relpath = package_directory.relative_to(archive_path)
+# itself at some point, so doing this all in C would just be a waste of
-        with ZipFile(archive_path) as zf:
+# effort.
-            toc = zf.namelist()
+
-        subdirs_seen = set()                        # type: Set
+class _ZipImportResourceReader(resources_abc.ResourceReader):
-        for filename in toc:
+    """Private class used to support ZipImport.get_resource_reader().
-            path = Path(filename)
+
-            # Strip off any path component parts that are in common with the
+    This class is allowed to reference all the innards and private parts of
-            # package directory, relative to the zip archive's file system
+    the zipimporter.
-            # path.  This gives us all the parts that live under the named
+    """
-            # package inside the zip file.  If the length of these subparts is
+
-            # exactly 1, then it is situated inside the package.  The resulting
+    def __init__(self, zipimporter, fullname):
-            # length will be 0 if it's above the package, and it will be
+        self.zipimporter = zipimporter
-            # greater than 1 if it lives in a subdirectory of the package
+        self.fullname = fullname
-            # directory.
+
-            #
+    def open_resource(self, resource):
-            # However, since directories themselves don't appear in the zip
+        path = f'{self.fullname}/{resource}'
-            # archive as a separate entry, we need to return the first path
+        try:
-            # component for any case that has > 1 subparts -- but only once!
+            return BytesIO(self.zipimporter.get_data(path))
-            if path.parts[:len(relpath.parts)] != relpath.parts:
+        except OSError:
            raise FileNotFoundError
    def resource_path(self, resource):
        # All resources are in the zip file, so there is no path to the file.
        # Raising FileNotFoundError tells the higher level API to extract the
        # binary data and create a temporary file.
        raise FileNotFoundError
    def is_resource(self, name):
        # Maybe we could do better, but if we can get the data, it's a
        # resource.  Otherwise it isn't.
        path = f'{self.fullname}/{name}'
        try:
            self.zipimporter.get_data(path)
        except OSError:
            return False
        return True
    def contents(self):
        # This is a bit convoluted, because fullname will be a module path,
        # but _files is a list of file names relative to the top of the
        # archive's namespace.  We want to compare file paths to find all the
        # names of things inside the module represented by fullname.  So we
        # turn the module path of fullname into a file path relative to the
        # top of the archive, and then we iterate through _files looking for
        # names inside that "directory".
        fullname_path = Path(self.zipimporter.get_filename(self.fullname))
        relative_path = fullname_path.relative_to(self.zipimporter.archive)
        # Don't forget that fullname names a package, so its path will include
        # __init__.py, which we want to ignore.
        assert relative_path.name == '__init__.py'
        package_path = relative_path.parent
        subdirs_seen = set()
        for filename in self.zipimporter._files:
            try:
                relative = Path(filename).relative_to(package_path)
            except ValueError:
                continue
-            subparts = path.parts[len(relpath.parts):]
+            # If the path of the file (which is relative to the top of the zip
-            if len(subparts) == 1:
+            # namespace), relative to the package given when the resource
-                yield subparts[0]
+            # reader was created, has a parent, then it's a name in a
-            elif len(subparts) > 1:
+            # subdirectory and thus we skip it.
-                subdir = subparts[0]
+            parent_name = relative.parent.name
-                if subdir not in subdirs_seen:
+            if len(parent_name) == 0:
-                    subdirs_seen.add(subdir)
+                yield relative.name
-                    yield subdir
+            elif parent_name not in subdirs_seen:
                subdirs_seen.add(parent_name)
                yield parent_name
 def _zipimport_get_resource_reader(zipimporter, fullname):
    try:
        if not zipimporter.is_package(fullname):
            return None
    except ZipImportError:
        return None
    return _ZipImportResourceReader(zipimporter, fullname)
--- a/Misc/NEWS.d/next/Library/2017-12-15-15-34-12.bpo-32248.zmO8G2.rst
+++ b/Misc/NEWS.d/next/Library/2017-12-15-15-34-12.bpo-32248.zmO8G2.rst
@ -1,13 +1,6 @@
-Add :class:`importlib.abc.ResourceReader` as an ABC to provide a
+Add :mod:`importlib.resources` and :class:`importlib.abc.ResourceReader` as
-unified API for reading resources contained within packages. Loaders
+the unified API for reading resources contained within packages.  Loaders
-wishing to support resource reading are expected to implement the
+wishing to support resource reading must implement the
-``get_resource_reader(fullname)`` method.
+:meth:`get_resource_reader()` method.  File-based and zipimport-based loaders
-
+both implement these APIs.  :class:`importlib.abc.ResourceLoader` is
-Also add :mod:`importlib.resources` as the stdlib port of the
+deprecated in favor of these new APIs.
 ``importlib_resources`` PyPI package. The modules provides a high-level
 API for end-users to read resources in a nicer fashion than having to
 directly interact with low-level details such as loaders.
 Thanks to this work, :class:`importlib.abc.ResourceLoader` has now
 been documented as deprecated due to its under-specified nature and
 lack of features as provided by :class:`importlib.abc.ResourceReader`.
--- a/Modules/clinic/zipimport.c.h
+++ b/Modules/clinic/zipimport.c.h
@ -291,4 +291,35 @@ zipimport_zipimporter_get_source(ZipImporter *self, PyObject *arg)
 exit:
    return return_value;
 }
-/*[clinic end generated code: output=93cb62a3a9752b9f input=a9049054013a1b77]*/
+
 PyDoc_STRVAR(zipimport_zipimporter_get_resource_reader__doc__,
 "get_resource_reader($self, fullname, /)\n"
 "--\n"
 "\n"
 "Return the ResourceReader for a package in a zip file.\n"
 "\n"
 "If \'fullname\' is a package within the zip file, return the \'ResourceReader\'\n"
 "object for the package.  Otherwise return None.");
 #define ZIPIMPORT_ZIPIMPORTER_GET_RESOURCE_READER_METHODDEF    \
    {"get_resource_reader", (PyCFunction)zipimport_zipimporter_get_resource_reader, METH_O, zipimport_zipimporter_get_resource_reader__doc__},
 static PyObject *
 zipimport_zipimporter_get_resource_reader_impl(ZipImporter *self,
                                               PyObject *fullname);
 static PyObject *
 zipimport_zipimporter_get_resource_reader(ZipImporter *self, PyObject *arg)
 {
    PyObject *return_value = NULL;
    PyObject *fullname;
    if (!PyArg_Parse(arg, "U:get_resource_reader", &fullname)) {
        goto exit;
    }
    return_value = zipimport_zipimporter_get_resource_reader_impl(self, fullname);
 exit:
    return return_value;
 }
 /*[clinic end generated code: output=0b57adfe21373512 input=a9049054013a1b77]*/
--- a/Modules/zipimport.c
+++ b/Modules/zipimport.c
@ -784,6 +784,35 @@ zipimport_zipimporter_get_source_impl(ZipImporter *self, PyObject *fullname)
    Py_RETURN_NONE;
 }
 /*[clinic input]
 zipimport.zipimporter.get_resource_reader
    fullname: unicode
    /
 Return the ResourceReader for a package in a zip file.
 If 'fullname' is a package within the zip file, return the 'ResourceReader'
 object for the package.  Otherwise return None.
 [clinic start generated code]*/
 static PyObject *
 zipimport_zipimporter_get_resource_reader_impl(ZipImporter *self,
                                               PyObject *fullname)
 /*[clinic end generated code: output=5e367d431f830726 input=bfab94d736e99151]*/
 {
    PyObject *module = PyImport_ImportModule("importlib.resources");
    if (module == NULL) {
        return NULL;
    }
    PyObject *retval = PyObject_CallMethod(
        module, "_zipimport_get_resource_reader",
        "OO", (PyObject *)self, fullname);
    Py_DECREF(module);
    return retval;
 }
 static PyMethodDef zipimporter_methods[] = {
    ZIPIMPORT_ZIPIMPORTER_FIND_MODULE_METHODDEF
@ -794,6 +823,7 @@ static PyMethodDef zipimporter_methods[] = {
    ZIPIMPORT_ZIPIMPORTER_GET_DATA_METHODDEF
    ZIPIMPORT_ZIPIMPORTER_GET_CODE_METHODDEF
    ZIPIMPORT_ZIPIMPORTER_GET_SOURCE_METHODDEF
    ZIPIMPORT_ZIPIMPORTER_GET_RESOURCE_READER_METHODDEF
    {NULL,              NULL}   /* sentinel */
 };