From 22980dc7c9dcec4b74fea815542601ef582c230e Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sat, 15 Jul 2023 09:21:17 -0400 Subject: [PATCH] gh-106752: Sync with zipp 3.16.2 (#106757) * gh-106752: Sync with zipp 3.16.2 * Add blurb --- .../test_zipfile/_path/test_complexity.py | 81 ++++++++++++++++++- Lib/test/test_zipfile/_path/test_path.py | 70 ++++++++++++++-- Lib/test/test_zipfile/_path/write-alpharep.py | 4 + Lib/zipfile/_path/__init__.py | 31 +++---- Lib/zipfile/_path/glob.py | 40 +++++++++ ...-07-14-16-54-13.gh-issue-106752.BT1Yxw.rst | 5 ++ 6 files changed, 204 insertions(+), 27 deletions(-) create mode 100644 Lib/test/test_zipfile/_path/write-alpharep.py create mode 100644 Lib/zipfile/_path/glob.py create mode 100644 Misc/NEWS.d/next/Library/2023-07-14-16-54-13.gh-issue-106752.BT1Yxw.rst diff --git a/Lib/test/test_zipfile/_path/test_complexity.py b/Lib/test/test_zipfile/_path/test_complexity.py index 3432dc39e56..7050937738a 100644 --- a/Lib/test/test_zipfile/_path/test_complexity.py +++ b/Lib/test/test_zipfile/_path/test_complexity.py @@ -1,5 +1,9 @@ -import unittest +import io +import itertools +import math +import re import string +import unittest import zipfile from ._functools import compose @@ -9,9 +13,11 @@ from ._support import import_or_skip big_o = import_or_skip('big_o') +pytest = import_or_skip('pytest') class TestComplexity(unittest.TestCase): + @pytest.mark.flaky def test_implied_dirs_performance(self): best, others = big_o.big_o( compose(consume, zipfile.CompleteDirs._implied_dirs), @@ -22,3 +28,76 @@ class TestComplexity(unittest.TestCase): min_n=1, ) assert best <= big_o.complexities.Linear + + def make_zip_path(self, depth=1, width=1) -> zipfile.Path: + """ + Construct a Path with width files at every level of depth. + """ + zf = zipfile.ZipFile(io.BytesIO(), mode='w') + pairs = itertools.product(self.make_deep_paths(depth), self.make_names(width)) + for path, name in pairs: + zf.writestr(f"{path}{name}.txt", b'') + zf.filename = "big un.zip" + return zipfile.Path(zf) + + @classmethod + def make_names(cls, width, letters=string.ascii_lowercase): + """ + >>> list(TestComplexity.make_names(2)) + ['a', 'b'] + >>> list(TestComplexity.make_names(30)) + ['aa', 'ab', ..., 'bd'] + """ + # determine how many products are needed to produce width + n_products = math.ceil(math.log(width, len(letters))) + inputs = (letters,) * n_products + combinations = itertools.product(*inputs) + names = map(''.join, combinations) + return itertools.islice(names, width) + + @classmethod + def make_deep_paths(cls, depth): + return map(cls.make_deep_path, range(depth)) + + @classmethod + def make_deep_path(cls, depth): + return ''.join(('d/',) * depth) + + def test_baseline_regex_complexity(self): + best, others = big_o.big_o( + lambda path: re.fullmatch(r'[^/]*\\.txt', path), + self.make_deep_path, + max_n=100, + min_n=1, + ) + assert best <= big_o.complexities.Constant + + @pytest.mark.flaky + def test_glob_depth(self): + best, others = big_o.big_o( + lambda path: consume(path.glob('*.txt')), + self.make_zip_path, + max_n=100, + min_n=1, + ) + assert best <= big_o.complexities.Quadratic + + @pytest.mark.flaky + def test_glob_width(self): + best, others = big_o.big_o( + lambda path: consume(path.glob('*.txt')), + lambda size: self.make_zip_path(width=size), + max_n=100, + min_n=1, + ) + assert best <= big_o.complexities.Linear + + @pytest.mark.flaky + def test_glob_width_and_depth(self): + best, others = big_o.big_o( + lambda path: consume(path.glob('*.txt')), + lambda size: self.make_zip_path(depth=size, width=size), + max_n=10, + min_n=1, + ) + assert best <= big_o.complexities.Linear diff --git a/Lib/test/test_zipfile/_path/test_path.py b/Lib/test/test_zipfile/_path/test_path.py index aff91e53995..c66cb3cba69 100644 --- a/Lib/test/test_zipfile/_path/test_path.py +++ b/Lib/test/test_zipfile/_path/test_path.py @@ -41,9 +41,13 @@ def build_alpharep_fixture(): │ ├── d │ │ └── e.txt │ └── f.txt - └── g - └── h - └── i.txt + ├── g + │ └── h + │ └── i.txt + └── j + ├── k.bin + ├── l.baz + └── m.bar This fixture has the following key characteristics: @@ -51,6 +55,7 @@ def build_alpharep_fixture(): - a file two levels deep (b/d/e) - multiple files in a directory (b/c, b/f) - a directory containing only a directory (g/h) + - a directory with files of different extensions (j/klm) "alpha" because it uses alphabet "rep" because it's a representative example @@ -62,6 +67,9 @@ def build_alpharep_fixture(): zf.writestr("b/d/e.txt", b"content of e") zf.writestr("b/f.txt", b"content of f") zf.writestr("g/h/i.txt", b"content of i") + zf.writestr("j/k.bin", b"content of k") + zf.writestr("j/l.baz", b"content of l") + zf.writestr("j/m.bar", b"content of m") zf.filename = "alpharep.zip" return zf @@ -92,7 +100,7 @@ class TestPath(unittest.TestCase): def test_iterdir_and_types(self, alpharep): root = zipfile.Path(alpharep) assert root.is_dir() - a, b, g = root.iterdir() + a, b, g, j = root.iterdir() assert a.is_file() assert b.is_dir() assert g.is_dir() @@ -112,7 +120,7 @@ class TestPath(unittest.TestCase): @pass_alpharep def test_iterdir_on_file(self, alpharep): root = zipfile.Path(alpharep) - a, b, g = root.iterdir() + a, b, g, j = root.iterdir() with self.assertRaises(ValueError): a.iterdir() @@ -127,7 +135,7 @@ class TestPath(unittest.TestCase): @pass_alpharep def test_open(self, alpharep): root = zipfile.Path(alpharep) - a, b, g = root.iterdir() + a, b, g, j = root.iterdir() with a.open(encoding="utf-8") as strm: data = strm.read() self.assertEqual(data, "content of a") @@ -229,7 +237,7 @@ class TestPath(unittest.TestCase): @pass_alpharep def test_read(self, alpharep): root = zipfile.Path(alpharep) - a, b, g = root.iterdir() + a, b, g, j = root.iterdir() assert a.read_text(encoding="utf-8") == "content of a" # Also check positional encoding arg (gh-101144). assert a.read_text("utf-8") == "content of a" @@ -295,7 +303,7 @@ class TestPath(unittest.TestCase): reflect that change. """ root = zipfile.Path(alpharep) - a, b, g = root.iterdir() + a, b, g, j = root.iterdir() alpharep.writestr('foo.txt', 'foo') alpharep.writestr('bar/baz.txt', 'baz') assert any(child.name == 'foo.txt' for child in root.iterdir()) @@ -394,6 +402,13 @@ class TestPath(unittest.TestCase): e = root / '.hgrc' assert e.suffixes == [] + @pass_alpharep + def test_suffix_no_filename(self, alpharep): + alpharep.filename = None + root = zipfile.Path(alpharep) + assert root.joinpath('example').suffix == "" + assert root.joinpath('example').suffixes == [] + @pass_alpharep def test_stem(self, alpharep): """ @@ -411,6 +426,8 @@ class TestPath(unittest.TestCase): d = root / "d" assert d.stem == "d" + assert (root / ".gitignore").stem == ".gitignore" + @pass_alpharep def test_root_parent(self, alpharep): root = zipfile.Path(alpharep) @@ -442,12 +459,49 @@ class TestPath(unittest.TestCase): assert not root.match("*.txt") assert list(root.glob("b/c.*")) == [zipfile.Path(alpharep, "b/c.txt")] + assert list(root.glob("b/*.txt")) == [ + zipfile.Path(alpharep, "b/c.txt"), + zipfile.Path(alpharep, "b/f.txt"), + ] + @pass_alpharep + def test_glob_recursive(self, alpharep): + root = zipfile.Path(alpharep) files = root.glob("**/*.txt") assert all(each.match("*.txt") for each in files) assert list(root.glob("**/*.txt")) == list(root.rglob("*.txt")) + @pass_alpharep + def test_glob_subdirs(self, alpharep): + root = zipfile.Path(alpharep) + + assert list(root.glob("*/i.txt")) == [] + assert list(root.rglob("*/i.txt")) == [zipfile.Path(alpharep, "g/h/i.txt")] + + @pass_alpharep + def test_glob_does_not_overmatch_dot(self, alpharep): + root = zipfile.Path(alpharep) + + assert list(root.glob("*.xt")) == [] + + @pass_alpharep + def test_glob_single_char(self, alpharep): + root = zipfile.Path(alpharep) + + assert list(root.glob("a?txt")) == [zipfile.Path(alpharep, "a.txt")] + assert list(root.glob("a[.]txt")) == [zipfile.Path(alpharep, "a.txt")] + assert list(root.glob("a[?]txt")) == [] + + @pass_alpharep + def test_glob_chars(self, alpharep): + root = zipfile.Path(alpharep) + + assert list(root.glob("j/?.b[ai][nz]")) == [ + zipfile.Path(alpharep, "j/k.bin"), + zipfile.Path(alpharep, "j/l.baz"), + ] + def test_glob_empty(self): root = zipfile.Path(zipfile.ZipFile(io.BytesIO(), 'w')) with self.assertRaises(ValueError): diff --git a/Lib/test/test_zipfile/_path/write-alpharep.py b/Lib/test/test_zipfile/_path/write-alpharep.py new file mode 100644 index 00000000000..48c09b53717 --- /dev/null +++ b/Lib/test/test_zipfile/_path/write-alpharep.py @@ -0,0 +1,4 @@ +from . import test_path + + +__name__ == '__main__' and test_path.build_alpharep_fixture().extractall('alpharep') diff --git a/Lib/zipfile/_path/__init__.py b/Lib/zipfile/_path/__init__.py index fd49a3ea91d..78c413563bb 100644 --- a/Lib/zipfile/_path/__init__.py +++ b/Lib/zipfile/_path/__init__.py @@ -5,7 +5,8 @@ import itertools import contextlib import pathlib import re -import fnmatch + +from .glob import translate __all__ = ['Path'] @@ -296,21 +297,24 @@ class Path: encoding, args, kwargs = _extract_text_encoding(*args, **kwargs) return io.TextIOWrapper(stream, encoding, *args, **kwargs) + def _base(self): + return pathlib.PurePosixPath(self.at or self.root.filename) + @property def name(self): - return pathlib.Path(self.at).name or self.filename.name + return self._base().name @property def suffix(self): - return pathlib.Path(self.at).suffix or self.filename.suffix + return self._base().suffix @property def suffixes(self): - return pathlib.Path(self.at).suffixes or self.filename.suffixes + return self._base().suffixes @property def stem(self): - return pathlib.Path(self.at).stem or self.filename.stem + return self._base().stem @property def filename(self): @@ -347,7 +351,7 @@ class Path: return filter(self._is_child, subs) def match(self, path_pattern): - return pathlib.Path(self.at).match(path_pattern) + return pathlib.PurePosixPath(self.at).match(path_pattern) def is_symlink(self): """ @@ -355,22 +359,13 @@ class Path: """ return False - def _descendants(self): - for child in self.iterdir(): - yield child - if child.is_dir(): - yield from child._descendants() - def glob(self, pattern): if not pattern: raise ValueError(f"Unacceptable pattern: {pattern!r}") - matches = re.compile(fnmatch.translate(pattern)).fullmatch - return ( - child - for child in self._descendants() - if matches(str(child.relative_to(self))) - ) + prefix = re.escape(self.at) + matches = re.compile(prefix + translate(pattern)).fullmatch + return map(self._next, filter(matches, self.root.namelist())) def rglob(self, pattern): return self.glob(f'**/{pattern}') diff --git a/Lib/zipfile/_path/glob.py b/Lib/zipfile/_path/glob.py new file mode 100644 index 00000000000..4a2e665e270 --- /dev/null +++ b/Lib/zipfile/_path/glob.py @@ -0,0 +1,40 @@ +import re + + +def translate(pattern): + r""" + Given a glob pattern, produce a regex that matches it. + + >>> translate('*.txt') + '[^/]*\\.txt' + >>> translate('a?txt') + 'a.txt' + >>> translate('**/*') + '.*/[^/]*' + """ + return ''.join(map(replace, separate(pattern))) + + +def separate(pattern): + """ + Separate out character sets to avoid translating their contents. + + >>> [m.group(0) for m in separate('*.txt')] + ['*.txt'] + >>> [m.group(0) for m in separate('a[?]txt')] + ['a', '[?]', 'txt'] + """ + return re.finditer(r'([^\[]+)|(?P[\[].*?[\]])|([\[][^\]]*$)', pattern) + + +def replace(match): + """ + Perform the replacements for a match from :func:`separate`. + """ + + return match.group('set') or ( + re.escape(match.group(0)) + .replace('\\*\\*', r'.*') + .replace('\\*', r'[^/]*') + .replace('\\?', r'.') + ) diff --git a/Misc/NEWS.d/next/Library/2023-07-14-16-54-13.gh-issue-106752.BT1Yxw.rst b/Misc/NEWS.d/next/Library/2023-07-14-16-54-13.gh-issue-106752.BT1Yxw.rst new file mode 100644 index 00000000000..bbc53d76dec --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-07-14-16-54-13.gh-issue-106752.BT1Yxw.rst @@ -0,0 +1,5 @@ +Fixed several bugs in zipfile.Path, including: in ``Path.match`, Windows +separators are no longer honored (and never were meant to be); Fixed +``name``/``suffix``/``suffixes``/``stem`` operations when no filename is +present and the Path is not at the root of the zipfile; Reworked glob for +performance and more correct matching behavior.