gh-106752: Sync with zipp 3.16.2 (#106757)

* gh-106752: Sync with zipp 3.16.2

* Add blurb
This commit is contained in:
Jason R. Coombs 2023-07-15 09:21:17 -04:00 committed by GitHub
parent 2566b74b26
commit 22980dc7c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 204 additions and 27 deletions

View File

@ -1,5 +1,9 @@
import unittest
import io
import itertools
import math
import re
import string
import unittest
import zipfile
from ._functools import compose
@ -9,9 +13,11 @@ from ._support import import_or_skip
big_o = import_or_skip('big_o')
pytest = import_or_skip('pytest')
class TestComplexity(unittest.TestCase):
@pytest.mark.flaky
def test_implied_dirs_performance(self):
best, others = big_o.big_o(
compose(consume, zipfile.CompleteDirs._implied_dirs),
@ -22,3 +28,76 @@ class TestComplexity(unittest.TestCase):
min_n=1,
)
assert best <= big_o.complexities.Linear
def make_zip_path(self, depth=1, width=1) -> zipfile.Path:
"""
Construct a Path with width files at every level of depth.
"""
zf = zipfile.ZipFile(io.BytesIO(), mode='w')
pairs = itertools.product(self.make_deep_paths(depth), self.make_names(width))
for path, name in pairs:
zf.writestr(f"{path}{name}.txt", b'')
zf.filename = "big un.zip"
return zipfile.Path(zf)
@classmethod
def make_names(cls, width, letters=string.ascii_lowercase):
"""
>>> list(TestComplexity.make_names(2))
['a', 'b']
>>> list(TestComplexity.make_names(30))
['aa', 'ab', ..., 'bd']
"""
# determine how many products are needed to produce width
n_products = math.ceil(math.log(width, len(letters)))
inputs = (letters,) * n_products
combinations = itertools.product(*inputs)
names = map(''.join, combinations)
return itertools.islice(names, width)
@classmethod
def make_deep_paths(cls, depth):
return map(cls.make_deep_path, range(depth))
@classmethod
def make_deep_path(cls, depth):
return ''.join(('d/',) * depth)
def test_baseline_regex_complexity(self):
best, others = big_o.big_o(
lambda path: re.fullmatch(r'[^/]*\\.txt', path),
self.make_deep_path,
max_n=100,
min_n=1,
)
assert best <= big_o.complexities.Constant
@pytest.mark.flaky
def test_glob_depth(self):
best, others = big_o.big_o(
lambda path: consume(path.glob('*.txt')),
self.make_zip_path,
max_n=100,
min_n=1,
)
assert best <= big_o.complexities.Quadratic
@pytest.mark.flaky
def test_glob_width(self):
best, others = big_o.big_o(
lambda path: consume(path.glob('*.txt')),
lambda size: self.make_zip_path(width=size),
max_n=100,
min_n=1,
)
assert best <= big_o.complexities.Linear
@pytest.mark.flaky
def test_glob_width_and_depth(self):
best, others = big_o.big_o(
lambda path: consume(path.glob('*.txt')),
lambda size: self.make_zip_path(depth=size, width=size),
max_n=10,
min_n=1,
)
assert best <= big_o.complexities.Linear

View File

@ -41,9 +41,13 @@ def build_alpharep_fixture():
d
e.txt
f.txt
g
h
i.txt
g
h
i.txt
j
k.bin
l.baz
m.bar
This fixture has the following key characteristics:
@ -51,6 +55,7 @@ def build_alpharep_fixture():
- a file two levels deep (b/d/e)
- multiple files in a directory (b/c, b/f)
- a directory containing only a directory (g/h)
- a directory with files of different extensions (j/klm)
"alpha" because it uses alphabet
"rep" because it's a representative example
@ -62,6 +67,9 @@ def build_alpharep_fixture():
zf.writestr("b/d/e.txt", b"content of e")
zf.writestr("b/f.txt", b"content of f")
zf.writestr("g/h/i.txt", b"content of i")
zf.writestr("j/k.bin", b"content of k")
zf.writestr("j/l.baz", b"content of l")
zf.writestr("j/m.bar", b"content of m")
zf.filename = "alpharep.zip"
return zf
@ -92,7 +100,7 @@ class TestPath(unittest.TestCase):
def test_iterdir_and_types(self, alpharep):
root = zipfile.Path(alpharep)
assert root.is_dir()
a, b, g = root.iterdir()
a, b, g, j = root.iterdir()
assert a.is_file()
assert b.is_dir()
assert g.is_dir()
@ -112,7 +120,7 @@ class TestPath(unittest.TestCase):
@pass_alpharep
def test_iterdir_on_file(self, alpharep):
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
a, b, g, j = root.iterdir()
with self.assertRaises(ValueError):
a.iterdir()
@ -127,7 +135,7 @@ class TestPath(unittest.TestCase):
@pass_alpharep
def test_open(self, alpharep):
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
a, b, g, j = root.iterdir()
with a.open(encoding="utf-8") as strm:
data = strm.read()
self.assertEqual(data, "content of a")
@ -229,7 +237,7 @@ class TestPath(unittest.TestCase):
@pass_alpharep
def test_read(self, alpharep):
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
a, b, g, j = root.iterdir()
assert a.read_text(encoding="utf-8") == "content of a"
# Also check positional encoding arg (gh-101144).
assert a.read_text("utf-8") == "content of a"
@ -295,7 +303,7 @@ class TestPath(unittest.TestCase):
reflect that change.
"""
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
a, b, g, j = root.iterdir()
alpharep.writestr('foo.txt', 'foo')
alpharep.writestr('bar/baz.txt', 'baz')
assert any(child.name == 'foo.txt' for child in root.iterdir())
@ -394,6 +402,13 @@ class TestPath(unittest.TestCase):
e = root / '.hgrc'
assert e.suffixes == []
@pass_alpharep
def test_suffix_no_filename(self, alpharep):
alpharep.filename = None
root = zipfile.Path(alpharep)
assert root.joinpath('example').suffix == ""
assert root.joinpath('example').suffixes == []
@pass_alpharep
def test_stem(self, alpharep):
"""
@ -411,6 +426,8 @@ class TestPath(unittest.TestCase):
d = root / "d"
assert d.stem == "d"
assert (root / ".gitignore").stem == ".gitignore"
@pass_alpharep
def test_root_parent(self, alpharep):
root = zipfile.Path(alpharep)
@ -442,12 +459,49 @@ class TestPath(unittest.TestCase):
assert not root.match("*.txt")
assert list(root.glob("b/c.*")) == [zipfile.Path(alpharep, "b/c.txt")]
assert list(root.glob("b/*.txt")) == [
zipfile.Path(alpharep, "b/c.txt"),
zipfile.Path(alpharep, "b/f.txt"),
]
@pass_alpharep
def test_glob_recursive(self, alpharep):
root = zipfile.Path(alpharep)
files = root.glob("**/*.txt")
assert all(each.match("*.txt") for each in files)
assert list(root.glob("**/*.txt")) == list(root.rglob("*.txt"))
@pass_alpharep
def test_glob_subdirs(self, alpharep):
root = zipfile.Path(alpharep)
assert list(root.glob("*/i.txt")) == []
assert list(root.rglob("*/i.txt")) == [zipfile.Path(alpharep, "g/h/i.txt")]
@pass_alpharep
def test_glob_does_not_overmatch_dot(self, alpharep):
root = zipfile.Path(alpharep)
assert list(root.glob("*.xt")) == []
@pass_alpharep
def test_glob_single_char(self, alpharep):
root = zipfile.Path(alpharep)
assert list(root.glob("a?txt")) == [zipfile.Path(alpharep, "a.txt")]
assert list(root.glob("a[.]txt")) == [zipfile.Path(alpharep, "a.txt")]
assert list(root.glob("a[?]txt")) == []
@pass_alpharep
def test_glob_chars(self, alpharep):
root = zipfile.Path(alpharep)
assert list(root.glob("j/?.b[ai][nz]")) == [
zipfile.Path(alpharep, "j/k.bin"),
zipfile.Path(alpharep, "j/l.baz"),
]
def test_glob_empty(self):
root = zipfile.Path(zipfile.ZipFile(io.BytesIO(), 'w'))
with self.assertRaises(ValueError):

View File

@ -0,0 +1,4 @@
from . import test_path
__name__ == '__main__' and test_path.build_alpharep_fixture().extractall('alpharep')

View File

@ -5,7 +5,8 @@ import itertools
import contextlib
import pathlib
import re
import fnmatch
from .glob import translate
__all__ = ['Path']
@ -296,21 +297,24 @@ class Path:
encoding, args, kwargs = _extract_text_encoding(*args, **kwargs)
return io.TextIOWrapper(stream, encoding, *args, **kwargs)
def _base(self):
return pathlib.PurePosixPath(self.at or self.root.filename)
@property
def name(self):
return pathlib.Path(self.at).name or self.filename.name
return self._base().name
@property
def suffix(self):
return pathlib.Path(self.at).suffix or self.filename.suffix
return self._base().suffix
@property
def suffixes(self):
return pathlib.Path(self.at).suffixes or self.filename.suffixes
return self._base().suffixes
@property
def stem(self):
return pathlib.Path(self.at).stem or self.filename.stem
return self._base().stem
@property
def filename(self):
@ -347,7 +351,7 @@ class Path:
return filter(self._is_child, subs)
def match(self, path_pattern):
return pathlib.Path(self.at).match(path_pattern)
return pathlib.PurePosixPath(self.at).match(path_pattern)
def is_symlink(self):
"""
@ -355,22 +359,13 @@ class Path:
"""
return False
def _descendants(self):
for child in self.iterdir():
yield child
if child.is_dir():
yield from child._descendants()
def glob(self, pattern):
if not pattern:
raise ValueError(f"Unacceptable pattern: {pattern!r}")
matches = re.compile(fnmatch.translate(pattern)).fullmatch
return (
child
for child in self._descendants()
if matches(str(child.relative_to(self)))
)
prefix = re.escape(self.at)
matches = re.compile(prefix + translate(pattern)).fullmatch
return map(self._next, filter(matches, self.root.namelist()))
def rglob(self, pattern):
return self.glob(f'**/{pattern}')

40
Lib/zipfile/_path/glob.py Normal file
View File

@ -0,0 +1,40 @@
import re
def translate(pattern):
r"""
Given a glob pattern, produce a regex that matches it.
>>> translate('*.txt')
'[^/]*\\.txt'
>>> translate('a?txt')
'a.txt'
>>> translate('**/*')
'.*/[^/]*'
"""
return ''.join(map(replace, separate(pattern)))
def separate(pattern):
"""
Separate out character sets to avoid translating their contents.
>>> [m.group(0) for m in separate('*.txt')]
['*.txt']
>>> [m.group(0) for m in separate('a[?]txt')]
['a', '[?]', 'txt']
"""
return re.finditer(r'([^\[]+)|(?P<set>[\[].*?[\]])|([\[][^\]]*$)', pattern)
def replace(match):
"""
Perform the replacements for a match from :func:`separate`.
"""
return match.group('set') or (
re.escape(match.group(0))
.replace('\\*\\*', r'.*')
.replace('\\*', r'[^/]*')
.replace('\\?', r'.')
)

View File

@ -0,0 +1,5 @@
Fixed several bugs in zipfile.Path, including: in ``Path.match`, Windows
separators are no longer honored (and never were meant to be); Fixed
``name``/``suffix``/``suffixes``/``stem`` operations when no filename is
present and the Path is not at the root of the zipfile; Reworked glob for
performance and more correct matching behavior.