bpo-39667: Sync zipp 3.0 (GH-18540)
* bpo-39667: Improve pathlib.Path compatibility on zipfile.Path and correct performance degradation as found in zipp 3.0 * 📜🤖 Added by blurb_it. * Update docs for new zipfile.Path.open * Rely on dict, faster than OrderedDict. * Syntax edits on docs Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
This commit is contained in:
parent
1f0cd3c61a
commit
0aeab5c438
|
@ -489,10 +489,20 @@ Path objects are traversable using the ``/`` operator.
|
|||
|
||||
The final path component.
|
||||
|
||||
.. method:: Path.open(*, **)
|
||||
.. method:: Path.open(mode='r', *, pwd, **)
|
||||
|
||||
Invoke :meth:`ZipFile.open` on the current path. Accepts
|
||||
the same arguments as :meth:`ZipFile.open`.
|
||||
Invoke :meth:`ZipFile.open` on the current path.
|
||||
Allows opening for read or write, text or binary
|
||||
through supported modes: 'r', 'w', 'rb', 'wb'.
|
||||
Positional and keyword arguments are passed through to
|
||||
:class:`io.TextIOWrapper` when opened as text and
|
||||
ignored otherwise.
|
||||
``pwd`` is the ``pwd`` parameter to
|
||||
:meth:`ZipFile.open`.
|
||||
|
||||
.. versionchanged:: 3.9
|
||||
Added support for text and binary modes for open. Default
|
||||
mode is now text.
|
||||
|
||||
.. method:: Path.iterdir()
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ import itertools
|
|||
import os
|
||||
import pathlib
|
||||
import posixpath
|
||||
import string
|
||||
import struct
|
||||
import subprocess
|
||||
import sys
|
||||
|
@ -2880,7 +2881,7 @@ class TestPath(unittest.TestCase):
|
|||
a, b, g = root.iterdir()
|
||||
with a.open() as strm:
|
||||
data = strm.read()
|
||||
assert data == b"content of a"
|
||||
assert data == "content of a"
|
||||
|
||||
def test_read(self):
|
||||
for alpharep in self.zipfile_alpharep():
|
||||
|
@ -2974,6 +2975,11 @@ class TestPath(unittest.TestCase):
|
|||
# Check the file iterated all items
|
||||
assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES
|
||||
|
||||
# @func_timeout.func_set_timeout(3)
|
||||
def test_implied_dirs_performance(self):
|
||||
data = ['/'.join(string.ascii_lowercase + str(n)) for n in range(10000)]
|
||||
zipfile.CompleteDirs._implied_dirs(data)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
@ -17,7 +17,6 @@ import sys
|
|||
import threading
|
||||
import time
|
||||
import contextlib
|
||||
from collections import OrderedDict
|
||||
|
||||
try:
|
||||
import zlib # We may need its compression method
|
||||
|
@ -2102,24 +2101,6 @@ class PyZipFile(ZipFile):
|
|||
return (fname, archivename)
|
||||
|
||||
|
||||
def _unique_everseen(iterable, key=None):
|
||||
"List unique elements, preserving order. Remember all elements ever seen."
|
||||
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
|
||||
# unique_everseen('ABBCcAD', str.lower) --> A B C D
|
||||
seen = set()
|
||||
seen_add = seen.add
|
||||
if key is None:
|
||||
for element in itertools.filterfalse(seen.__contains__, iterable):
|
||||
seen_add(element)
|
||||
yield element
|
||||
else:
|
||||
for element in iterable:
|
||||
k = key(element)
|
||||
if k not in seen:
|
||||
seen_add(k)
|
||||
yield element
|
||||
|
||||
|
||||
def _parents(path):
|
||||
"""
|
||||
Given a path with elements separated by
|
||||
|
@ -2161,6 +2142,18 @@ def _ancestry(path):
|
|||
path, tail = posixpath.split(path)
|
||||
|
||||
|
||||
_dedupe = dict.fromkeys
|
||||
"""Deduplicate an iterable in original order"""
|
||||
|
||||
|
||||
def _difference(minuend, subtrahend):
|
||||
"""
|
||||
Return items in minuend not in subtrahend, retaining order
|
||||
with O(1) lookup.
|
||||
"""
|
||||
return itertools.filterfalse(set(subtrahend).__contains__, minuend)
|
||||
|
||||
|
||||
class CompleteDirs(ZipFile):
|
||||
"""
|
||||
A ZipFile subclass that ensures that implied directories
|
||||
|
@ -2170,13 +2163,8 @@ class CompleteDirs(ZipFile):
|
|||
@staticmethod
|
||||
def _implied_dirs(names):
|
||||
parents = itertools.chain.from_iterable(map(_parents, names))
|
||||
# Deduplicate entries in original order
|
||||
implied_dirs = OrderedDict.fromkeys(
|
||||
p + posixpath.sep for p in parents
|
||||
# Cast names to a set for O(1) lookups
|
||||
if p + posixpath.sep not in set(names)
|
||||
)
|
||||
return implied_dirs
|
||||
as_dirs = (p + posixpath.sep for p in parents)
|
||||
return _dedupe(_difference(as_dirs, names))
|
||||
|
||||
def namelist(self):
|
||||
names = super(CompleteDirs, self).namelist()
|
||||
|
@ -2305,20 +2293,31 @@ class Path:
|
|||
self.root = FastLookup.make(root)
|
||||
self.at = at
|
||||
|
||||
@property
|
||||
def open(self):
|
||||
return functools.partial(self.root.open, self.at)
|
||||
def open(self, mode='r', *args, **kwargs):
|
||||
"""
|
||||
Open this entry as text or binary following the semantics
|
||||
of ``pathlib.Path.open()`` by passing arguments through
|
||||
to io.TextIOWrapper().
|
||||
"""
|
||||
pwd = kwargs.pop('pwd', None)
|
||||
zip_mode = mode[0]
|
||||
stream = self.root.open(self.at, zip_mode, pwd=pwd)
|
||||
if 'b' in mode:
|
||||
if args or kwargs:
|
||||
raise ValueError("encoding args invalid for binary operation")
|
||||
return stream
|
||||
return io.TextIOWrapper(stream, *args, **kwargs)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return posixpath.basename(self.at.rstrip("/"))
|
||||
|
||||
def read_text(self, *args, **kwargs):
|
||||
with self.open() as strm:
|
||||
return io.TextIOWrapper(strm, *args, **kwargs).read()
|
||||
with self.open('r', *args, **kwargs) as strm:
|
||||
return strm.read()
|
||||
|
||||
def read_bytes(self):
|
||||
with self.open() as strm:
|
||||
with self.open('rb') as strm:
|
||||
return strm.read()
|
||||
|
||||
def _is_child(self, path):
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Improve pathlib.Path compatibility on zipfile.Path and correct performance degradation as found in zipp 3.0.
|
Loading…
Reference in New Issue