mirror of https://github.com/python/cpython
GH-115060: Speed up `pathlib.Path.glob()` by not scanning literal parts (#117732)
Don't bother calling `os.scandir()` to scan for literal pattern segments, like `foo` in `foo/*.py`. Instead, append the segment(s) as-is and call through to the next selector with `exists=False`, which signals that the path might not exist. Subsequent selectors will call `os.scandir()` or `os.lstat()` to filter out missing paths as needed.
This commit is contained in:
parent
069de14cb9
commit
0eb52f5f26
22
Lib/glob.py
22
Lib/glob.py
|
@ -331,9 +331,10 @@ class _Globber:
|
||||||
"""Class providing shell-style pattern matching and globbing.
|
"""Class providing shell-style pattern matching and globbing.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, sep, case_sensitive, recursive=False):
|
def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False):
|
||||||
self.sep = sep
|
self.sep = sep
|
||||||
self.case_sensitive = case_sensitive
|
self.case_sensitive = case_sensitive
|
||||||
|
self.case_pedantic = case_pedantic
|
||||||
self.recursive = recursive
|
self.recursive = recursive
|
||||||
|
|
||||||
# Low-level methods
|
# Low-level methods
|
||||||
|
@ -373,6 +374,8 @@ class _Globber:
|
||||||
selector = self.recursive_selector
|
selector = self.recursive_selector
|
||||||
elif part in _special_parts:
|
elif part in _special_parts:
|
||||||
selector = self.special_selector
|
selector = self.special_selector
|
||||||
|
elif not self.case_pedantic and magic_check.search(part) is None:
|
||||||
|
selector = self.literal_selector
|
||||||
else:
|
else:
|
||||||
selector = self.wildcard_selector
|
selector = self.wildcard_selector
|
||||||
return selector(part, parts)
|
return selector(part, parts)
|
||||||
|
@ -387,6 +390,23 @@ class _Globber:
|
||||||
return select_next(path, exists)
|
return select_next(path, exists)
|
||||||
return select_special
|
return select_special
|
||||||
|
|
||||||
|
def literal_selector(self, part, parts):
|
||||||
|
"""Returns a function that selects a literal descendant of a path.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Optimization: consume and join any subsequent literal parts here,
|
||||||
|
# rather than leaving them for the next selector. This reduces the
|
||||||
|
# number of string concatenation operations and calls to add_slash().
|
||||||
|
while parts and magic_check.search(parts[-1]) is None:
|
||||||
|
part += self.sep + parts.pop()
|
||||||
|
|
||||||
|
select_next = self.selector(parts)
|
||||||
|
|
||||||
|
def select_literal(path, exists=False):
|
||||||
|
path = self.concat_path(self.add_slash(path), part)
|
||||||
|
return select_next(path, exists=False)
|
||||||
|
return select_literal
|
||||||
|
|
||||||
def wildcard_selector(self, part, parts):
|
def wildcard_selector(self, part, parts):
|
||||||
"""Returns a function that selects direct children of a given path,
|
"""Returns a function that selects direct children of a given path,
|
||||||
filtering by pattern.
|
filtering by pattern.
|
||||||
|
|
|
@ -686,8 +686,14 @@ class PathBase(PurePathBase):
|
||||||
def _glob_selector(self, parts, case_sensitive, recurse_symlinks):
|
def _glob_selector(self, parts, case_sensitive, recurse_symlinks):
|
||||||
if case_sensitive is None:
|
if case_sensitive is None:
|
||||||
case_sensitive = _is_case_sensitive(self.parser)
|
case_sensitive = _is_case_sensitive(self.parser)
|
||||||
|
case_pedantic = False
|
||||||
|
else:
|
||||||
|
# The user has expressed a case sensitivity choice, but we don't
|
||||||
|
# know the case sensitivity of the underlying filesystem, so we
|
||||||
|
# must use scandir() for everything, including non-wildcard parts.
|
||||||
|
case_pedantic = True
|
||||||
recursive = True if recurse_symlinks else glob._no_recurse_symlinks
|
recursive = True if recurse_symlinks else glob._no_recurse_symlinks
|
||||||
globber = self._globber(self.parser.sep, case_sensitive, recursive)
|
globber = self._globber(self.parser.sep, case_sensitive, case_pedantic, recursive)
|
||||||
return globber.selector(parts)
|
return globber.selector(parts)
|
||||||
|
|
||||||
def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
|
def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
|
||||||
|
|
|
@ -1429,10 +1429,10 @@ class DummyPath(PathBase):
|
||||||
return "{}({!r})".format(self.__class__.__name__, self.as_posix())
|
return "{}({!r})".format(self.__class__.__name__, self.as_posix())
|
||||||
|
|
||||||
def stat(self, *, follow_symlinks=True):
|
def stat(self, *, follow_symlinks=True):
|
||||||
if follow_symlinks:
|
if follow_symlinks or self.name in ('', '.', '..'):
|
||||||
path = str(self.resolve())
|
path = str(self.resolve(strict=True))
|
||||||
else:
|
else:
|
||||||
path = str(self.parent.resolve() / self.name)
|
path = str(self.parent.resolve(strict=True) / self.name)
|
||||||
if path in self._files:
|
if path in self._files:
|
||||||
st_mode = stat.S_IFREG
|
st_mode = stat.S_IFREG
|
||||||
elif path in self._directories:
|
elif path in self._directories:
|
||||||
|
@ -1741,8 +1741,9 @@ class DummyPathTest(DummyPurePathTest):
|
||||||
def test_glob_posix(self):
|
def test_glob_posix(self):
|
||||||
P = self.cls
|
P = self.cls
|
||||||
p = P(self.base)
|
p = P(self.base)
|
||||||
|
q = p / "FILEa"
|
||||||
given = set(p.glob("FILEa"))
|
given = set(p.glob("FILEa"))
|
||||||
expect = set()
|
expect = {q} if q.exists() else set()
|
||||||
self.assertEqual(given, expect)
|
self.assertEqual(given, expect)
|
||||||
self.assertEqual(set(p.glob("FILEa*")), set())
|
self.assertEqual(set(p.glob("FILEa*")), set())
|
||||||
|
|
||||||
|
@ -1753,8 +1754,6 @@ class DummyPathTest(DummyPurePathTest):
|
||||||
self.assertEqual(set(p.glob("FILEa")), { P(self.base, "fileA") })
|
self.assertEqual(set(p.glob("FILEa")), { P(self.base, "fileA") })
|
||||||
self.assertEqual(set(p.glob("*a\\")), { P(self.base, "dirA/") })
|
self.assertEqual(set(p.glob("*a\\")), { P(self.base, "dirA/") })
|
||||||
self.assertEqual(set(p.glob("F*a")), { P(self.base, "fileA") })
|
self.assertEqual(set(p.glob("F*a")), { P(self.base, "fileA") })
|
||||||
self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"})
|
|
||||||
self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"})
|
|
||||||
|
|
||||||
def test_glob_empty_pattern(self):
|
def test_glob_empty_pattern(self):
|
||||||
P = self.cls
|
P = self.cls
|
||||||
|
@ -1857,8 +1856,9 @@ class DummyPathTest(DummyPurePathTest):
|
||||||
def test_rglob_posix(self):
|
def test_rglob_posix(self):
|
||||||
P = self.cls
|
P = self.cls
|
||||||
p = P(self.base, "dirC")
|
p = P(self.base, "dirC")
|
||||||
|
q = p / "dirD" / "FILEd"
|
||||||
given = set(p.rglob("FILEd"))
|
given = set(p.rglob("FILEd"))
|
||||||
expect = set()
|
expect = {q} if q.exists() else set()
|
||||||
self.assertEqual(given, expect)
|
self.assertEqual(given, expect)
|
||||||
self.assertEqual(set(p.rglob("FILEd*")), set())
|
self.assertEqual(set(p.rglob("FILEd*")), set())
|
||||||
|
|
||||||
|
@ -1868,7 +1868,6 @@ class DummyPathTest(DummyPurePathTest):
|
||||||
p = P(self.base, "dirC")
|
p = P(self.base, "dirC")
|
||||||
self.assertEqual(set(p.rglob("FILEd")), { P(self.base, "dirC/dirD/fileD") })
|
self.assertEqual(set(p.rglob("FILEd")), { P(self.base, "dirC/dirD/fileD") })
|
||||||
self.assertEqual(set(p.rglob("*\\")), { P(self.base, "dirC/dirD/") })
|
self.assertEqual(set(p.rglob("*\\")), { P(self.base, "dirC/dirD/") })
|
||||||
self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"})
|
|
||||||
|
|
||||||
@needs_symlinks
|
@needs_symlinks
|
||||||
def test_rglob_recurse_symlinks_common(self):
|
def test_rglob_recurse_symlinks_common(self):
|
||||||
|
@ -1931,7 +1930,11 @@ class DummyPathTest(DummyPurePathTest):
|
||||||
self.assertEqual(set(p.glob("dirA/../file*")), { P(self.base, "dirA/../fileA") })
|
self.assertEqual(set(p.glob("dirA/../file*")), { P(self.base, "dirA/../fileA") })
|
||||||
self.assertEqual(set(p.glob("dirA/../file*/..")), set())
|
self.assertEqual(set(p.glob("dirA/../file*/..")), set())
|
||||||
self.assertEqual(set(p.glob("../xyzzy")), set())
|
self.assertEqual(set(p.glob("../xyzzy")), set())
|
||||||
self.assertEqual(set(p.glob("xyzzy/..")), set())
|
if self.cls.parser is posixpath:
|
||||||
|
self.assertEqual(set(p.glob("xyzzy/..")), set())
|
||||||
|
else:
|
||||||
|
# ".." segments are normalized first on Windows, so this path is stat()able.
|
||||||
|
self.assertEqual(set(p.glob("xyzzy/..")), { P(self.base, "xyzzy", "..") })
|
||||||
self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(self.base, *[".."] * 50)})
|
self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(self.base, *[".."] * 50)})
|
||||||
|
|
||||||
@needs_symlinks
|
@needs_symlinks
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Speed up :meth:`pathlib.Path.glob` by not scanning directories for
|
||||||
|
non-wildcard pattern segments.
|
Loading…
Reference in New Issue