diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index 4e9edff2701..9db9697105d 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -52,6 +52,15 @@ The module defines the following items: :ref:`zipfile-objects` for constructor details. +.. class:: Path + :noindex: + + A pathlib-compatible wrapper for zip files. See section + :ref:`path-objects` for details. + + .. versionadded:: 3.8 + + .. class:: PyZipFile :noindex: @@ -456,6 +465,64 @@ The following data attributes are also available: truncated. +.. _path-objects: + +Path Objects +------------ + +.. class:: Path(root, at='') + + Construct a Path object from a ``root`` zipfile (which may be a + :class:`ZipFile` instance or ``file`` suitable for passing to + the :class:`ZipFile` constructor). + + ``at`` specifies the location of this Path within the zipfile, + e.g. 'dir/file.txt', 'dir/', or ''. Defaults to the empty string, + indicating the root. + +Path objects expose the following features of :mod:`pathlib.Path` +objects: + +Path objects are traversable using the ``/`` operator. + +.. attribute:: Path.name + + The final path component. + +.. method:: Path.open(*, **) + + Invoke :meth:`ZipFile.open` on the current path. Accepts + the same arguments as :meth:`ZipFile.open`. + +.. method:: Path.listdir() + + Enumerate the children of the current directory. + +.. method:: Path.is_dir() + + Return ``True`` if the current context references a directory. + +.. method:: Path.is_file() + + Return ``True`` if the current context references a file. + +.. method:: Path.exists() + + Return ``True`` if the current context references a file or + directory in the zip file. + +.. method:: Path.read_text(*, **) + + Read the current file as unicode text. Positional and + keyword arguments are passed through to + :class:`io.TextIOWrapper` (except ``buffer``, which is + implied by the context). + +.. method:: Path.read_bytes() + + Read the current file as bytes. + + .. _pyzipfile-objects: PyZipFile Objects diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 14e1e08c5bf..538d4ee55df 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -1,13 +1,15 @@ import contextlib +import importlib.util import io import os -import importlib.util import pathlib import posixpath -import time +import shutil import struct -import zipfile +import tempfile +import time import unittest +import zipfile from tempfile import TemporaryFile @@ -2392,5 +2394,113 @@ class CommandLineTest(unittest.TestCase): with open(path, 'rb') as f: self.assertEqual(f.read(), zf.read(zi)) + +# Poor man's technique to consume a (smallish) iterable. +consume = tuple + + +def add_dirs(zipfile): + """ + Given a writable zipfile, inject directory entries for + any directories implied by the presence of children. + """ + names = zipfile.namelist() + consume( + zipfile.writestr(name + "/", b"") + for name in map(posixpath.dirname, names) + if name and name + "/" not in names + ) + return zipfile + + +def build_abcde_files(): + """ + Create a zip file with this structure: + + . + ├── a.txt + └── b + ├── c.txt + └── d + └── e.txt + """ + data = io.BytesIO() + zf = zipfile.ZipFile(data, "w") + zf.writestr("a.txt", b"content of a") + zf.writestr("b/c.txt", b"content of c") + zf.writestr("b/d/e.txt", b"content of e") + zf.filename = "abcde.zip" + return zf + + +class TestPath(unittest.TestCase): + def setUp(self): + self.fixtures = contextlib.ExitStack() + self.addCleanup(self.fixtures.close) + + def zipfile_abcde(self): + with self.subTest(): + yield build_abcde_files() + with self.subTest(): + yield add_dirs(build_abcde_files()) + + def zipfile_ondisk(self): + tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir())) + for zipfile_abcde in self.zipfile_abcde(): + buffer = zipfile_abcde.fp + zipfile_abcde.close() + path = tmpdir / zipfile_abcde.filename + with path.open("wb") as strm: + strm.write(buffer.getvalue()) + yield path + + def test_iterdir_istype(self): + for zipfile_abcde in self.zipfile_abcde(): + root = zipfile.Path(zipfile_abcde) + assert root.is_dir() + a, b = root.iterdir() + assert a.is_file() + assert b.is_dir() + c, d = b.iterdir() + assert c.is_file() + e, = d.iterdir() + assert e.is_file() + + def test_open(self): + for zipfile_abcde in self.zipfile_abcde(): + root = zipfile.Path(zipfile_abcde) + a, b = root.iterdir() + with a.open() as strm: + data = strm.read() + assert data == b"content of a" + + def test_read(self): + for zipfile_abcde in self.zipfile_abcde(): + root = zipfile.Path(zipfile_abcde) + a, b = root.iterdir() + assert a.read_text() == "content of a" + assert a.read_bytes() == b"content of a" + + def test_traverse_truediv(self): + for zipfile_abcde in self.zipfile_abcde(): + root = zipfile.Path(zipfile_abcde) + a = root / "a" + assert a.is_file() + e = root / "b" / "d" / "e.txt" + assert e.read_text() == "content of e" + + def test_pathlike_construction(self): + """ + zipfile.Path should be constructable from a path-like object + """ + for zipfile_ondisk in self.zipfile_ondisk(): + pathlike = pathlib.Path(str(zipfile_ondisk)) + zipfile.Path(pathlike) + + def test_traverse_pathlike(self): + for zipfile_abcde in self.zipfile_abcde(): + root = zipfile.Path(zipfile_abcde) + root / pathlib.Path("a") + if __name__ == "__main__": unittest.main() diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 2dc01647211..62475c701f5 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -3,16 +3,18 @@ Read and write ZIP files. XXX references to utf-8 need further investigation. """ +import binascii +import functools +import importlib.util import io import os -import importlib.util -import sys -import time -import stat +import posixpath import shutil +import stat import struct -import binascii +import sys import threading +import time try: import zlib # We may need its compression method @@ -2102,6 +2104,138 @@ class PyZipFile(ZipFile): return (fname, archivename) +class Path: + """ + A pathlib-compatible interface for zip files. + + Consider a zip file with this structure:: + + . + ├── a.txt + └── b + ├── c.txt + └── d + └── e.txt + + >>> data = io.BytesIO() + >>> zf = ZipFile(data, 'w') + >>> zf.writestr('a.txt', 'content of a') + >>> zf.writestr('b/c.txt', 'content of c') + >>> zf.writestr('b/d/e.txt', 'content of e') + >>> zf.filename = 'abcde.zip' + + Path accepts the zipfile object itself or a filename + + >>> root = Path(zf) + + From there, several path operations are available. + + Directory iteration (including the zip file itself): + + >>> a, b = root.iterdir() + >>> a + Path('abcde.zip', 'a.txt') + >>> b + Path('abcde.zip', 'b/') + + name property: + + >>> b.name + 'b' + + join with divide operator: + + >>> c = b / 'c.txt' + >>> c + Path('abcde.zip', 'b/c.txt') + >>> c.name + 'c.txt' + + Read text: + + >>> c.read_text() + 'content of c' + + existence: + + >>> c.exists() + True + >>> (b / 'missing.txt').exists() + False + + Coersion to string: + + >>> str(c) + 'abcde.zip/b/c.txt' + """ + + __repr = "{self.__class__.__name__}({self.root.filename!r}, {self.at!r})" + + def __init__(self, root, at=""): + self.root = root if isinstance(root, ZipFile) else ZipFile(root) + self.at = at + + @property + def open(self): + return functools.partial(self.root.open, self.at) + + @property + def name(self): + return posixpath.basename(self.at.rstrip("/")) + + def read_text(self, *args, **kwargs): + with self.open() as strm: + return io.TextIOWrapper(strm, *args, **kwargs).read() + + def read_bytes(self): + with self.open() as strm: + return strm.read() + + def _is_child(self, path): + return posixpath.dirname(path.at.rstrip("/")) == self.at.rstrip("/") + + def _next(self, at): + return Path(self.root, at) + + def is_dir(self): + return not self.at or self.at.endswith("/") + + def is_file(self): + return not self.is_dir() + + def exists(self): + return self.at in self._names() + + def iterdir(self): + if not self.is_dir(): + raise ValueError("Can't listdir a file") + subs = map(self._next, self._names()) + return filter(self._is_child, subs) + + def __str__(self): + return posixpath.join(self.root.filename, self.at) + + def __repr__(self): + return self.__repr.format(self=self) + + def __truediv__(self, add): + next = posixpath.join(self.at, add) + next_dir = posixpath.join(self.at, add, "") + names = self._names() + return self._next(next_dir if next not in names and next_dir in names else next) + + @staticmethod + def _add_implied_dirs(names): + return names + [ + name + "/" + for name in map(posixpath.dirname, names) + if name and name + "/" not in names + ] + + def _names(self): + return self._add_implied_dirs(self.root.namelist()) + + def main(args=None): import argparse diff --git a/Misc/NEWS.d/next/Library/2019-05-07-15-00-45.bpo-36832.TExgqb.rst b/Misc/NEWS.d/next/Library/2019-05-07-15-00-45.bpo-36832.TExgqb.rst new file mode 100644 index 00000000000..23577d9b5a8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-05-07-15-00-45.bpo-36832.TExgqb.rst @@ -0,0 +1 @@ +Introducing ``zipfile.Path``, a pathlib-compatible wrapper for traversing zip files.