GH-125413: pathlib ABCs: use `scandir()` to speed up `glob()` (#126261)

Use the new `PathBase.scandir()` method in `PathBase.glob()`, which greatly
reduces the number of `PathBase.stat()` calls needed when globbing.

There are no user-facing changes, because the pathlib ABCs are still
private and `Path.glob()` doesn't use the implementation in its superclass.
This commit is contained in:
Barney Gale 2024-11-01 17:48:58 +00:00 committed by GitHub
parent 464a7a91d0
commit 68a51e0178
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 10 additions and 25 deletions

View File

@ -364,12 +364,6 @@ class _GlobberBase:
"""
raise NotImplementedError
@staticmethod
def parse_entry(entry):
"""Returns the path of an entry yielded from scandir().
"""
raise NotImplementedError
# High-level methods
def compile(self, pat):
@ -438,6 +432,7 @@ class _GlobberBase:
except OSError:
pass
else:
prefix = self.add_slash(path)
for entry in entries:
if match is None or match(entry.name):
if dir_only:
@ -446,7 +441,7 @@ class _GlobberBase:
continue
except OSError:
continue
entry_path = self.parse_entry(entry)
entry_path = self.concat_path(prefix, entry.name)
if dir_only:
yield from select_next(entry_path, exists=True)
else:
@ -495,6 +490,7 @@ class _GlobberBase:
except OSError:
pass
else:
prefix = self.add_slash(path)
for entry in entries:
is_dir = False
try:
@ -504,7 +500,7 @@ class _GlobberBase:
pass
if is_dir or not dir_only:
entry_path = self.parse_entry(entry)
entry_path = self.concat_path(prefix, entry.name)
if match is None or match(str(entry_path), match_pos):
if dir_only:
yield from select_next(entry_path, exists=True)
@ -533,7 +529,6 @@ class _StringGlobber(_GlobberBase):
"""
lexists = staticmethod(os.path.lexists)
scandir = staticmethod(os.scandir)
parse_entry = operator.attrgetter('path')
concat_path = operator.add
if os.name == 'nt':

View File

@ -94,25 +94,13 @@ class PathGlobber(_GlobberBase):
lexists = operator.methodcaller('exists', follow_symlinks=False)
add_slash = operator.methodcaller('joinpath', '')
@staticmethod
def scandir(path):
"""Emulates os.scandir(), which returns an object that can be used as
a context manager. This method is called by walk() and glob().
"""
import contextlib
return contextlib.nullcontext(path.iterdir())
scandir = operator.methodcaller('scandir')
@staticmethod
def concat_path(path, text):
"""Appends text to the given path."""
return path.with_segments(path._raw_path + text)
@staticmethod
def parse_entry(entry):
"""Returns the path of an entry yielded from scandir()."""
return entry
class PurePathBase:
"""Base class for pure path objects.

View File

@ -1633,8 +1633,10 @@ class DummyPathTest(DummyPurePathTest):
p.joinpath('linkA').symlink_to('fileA')
p.joinpath('brokenLink').symlink_to('non-existing')
p.joinpath('linkB').symlink_to('dirB', target_is_directory=True)
p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB'))
p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB'))
p.joinpath('dirA', 'linkC').symlink_to(
parser.join('..', 'dirB'), target_is_directory=True)
p.joinpath('dirB', 'linkD').symlink_to(
parser.join('..', 'dirB'), target_is_directory=True)
p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop')
def tearDown(self):
@ -2479,7 +2481,7 @@ class DummyPathTest(DummyPurePathTest):
if i % 2:
link.symlink_to(P(self.base, "dirE", "nonexistent"))
else:
link.symlink_to(P(self.base, "dirC"))
link.symlink_to(P(self.base, "dirC"), target_is_directory=True)
self.assertEqual(len(set(base.glob("*"))), 100)
self.assertEqual(len(set(base.glob("*/"))), 50)