From 68a51e0178e86be8b697683fd108aa795f235507 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 1 Nov 2024 17:48:58 +0000 Subject: [PATCH] GH-125413: pathlib ABCs: use `scandir()` to speed up `glob()` (#126261) Use the new `PathBase.scandir()` method in `PathBase.glob()`, which greatly reduces the number of `PathBase.stat()` calls needed when globbing. There are no user-facing changes, because the pathlib ABCs are still private and `Path.glob()` doesn't use the implementation in its superclass. --- Lib/glob.py | 13 ++++--------- Lib/pathlib/_abc.py | 14 +------------- Lib/test/test_pathlib/test_pathlib_abc.py | 8 +++++--- 3 files changed, 10 insertions(+), 25 deletions(-) diff --git a/Lib/glob.py b/Lib/glob.py index 574e5ad51b6..ce9b3698888 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -364,12 +364,6 @@ class _GlobberBase: """ raise NotImplementedError - @staticmethod - def parse_entry(entry): - """Returns the path of an entry yielded from scandir(). - """ - raise NotImplementedError - # High-level methods def compile(self, pat): @@ -438,6 +432,7 @@ class _GlobberBase: except OSError: pass else: + prefix = self.add_slash(path) for entry in entries: if match is None or match(entry.name): if dir_only: @@ -446,7 +441,7 @@ class _GlobberBase: continue except OSError: continue - entry_path = self.parse_entry(entry) + entry_path = self.concat_path(prefix, entry.name) if dir_only: yield from select_next(entry_path, exists=True) else: @@ -495,6 +490,7 @@ class _GlobberBase: except OSError: pass else: + prefix = self.add_slash(path) for entry in entries: is_dir = False try: @@ -504,7 +500,7 @@ class _GlobberBase: pass if is_dir or not dir_only: - entry_path = self.parse_entry(entry) + entry_path = self.concat_path(prefix, entry.name) if match is None or match(str(entry_path), match_pos): if dir_only: yield from select_next(entry_path, exists=True) @@ -533,7 +529,6 @@ class _StringGlobber(_GlobberBase): """ lexists = staticmethod(os.path.lexists) scandir = staticmethod(os.scandir) - parse_entry = operator.attrgetter('path') concat_path = operator.add if os.name == 'nt': diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index dfff8b460d1..cc7c1991d0e 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -94,25 +94,13 @@ class PathGlobber(_GlobberBase): lexists = operator.methodcaller('exists', follow_symlinks=False) add_slash = operator.methodcaller('joinpath', '') - - @staticmethod - def scandir(path): - """Emulates os.scandir(), which returns an object that can be used as - a context manager. This method is called by walk() and glob(). - """ - import contextlib - return contextlib.nullcontext(path.iterdir()) + scandir = operator.methodcaller('scandir') @staticmethod def concat_path(path, text): """Appends text to the given path.""" return path.with_segments(path._raw_path + text) - @staticmethod - def parse_entry(entry): - """Returns the path of an entry yielded from scandir().""" - return entry - class PurePathBase: """Base class for pure path objects. diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 11e34f5d378..4596d0b0e26 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1633,8 +1633,10 @@ class DummyPathTest(DummyPurePathTest): p.joinpath('linkA').symlink_to('fileA') p.joinpath('brokenLink').symlink_to('non-existing') p.joinpath('linkB').symlink_to('dirB', target_is_directory=True) - p.joinpath('dirA', 'linkC').symlink_to(parser.join('..', 'dirB')) - p.joinpath('dirB', 'linkD').symlink_to(parser.join('..', 'dirB')) + p.joinpath('dirA', 'linkC').symlink_to( + parser.join('..', 'dirB'), target_is_directory=True) + p.joinpath('dirB', 'linkD').symlink_to( + parser.join('..', 'dirB'), target_is_directory=True) p.joinpath('brokenLinkLoop').symlink_to('brokenLinkLoop') def tearDown(self): @@ -2479,7 +2481,7 @@ class DummyPathTest(DummyPurePathTest): if i % 2: link.symlink_to(P(self.base, "dirE", "nonexistent")) else: - link.symlink_to(P(self.base, "dirC")) + link.symlink_to(P(self.base, "dirC"), target_is_directory=True) self.assertEqual(len(set(base.glob("*"))), 100) self.assertEqual(len(set(base.glob("*/"))), 50)