GH-113225: Speed up `pathlib.Path.walk(top_down=False)` (#113693)

Use `_make_child_entry()` rather than `_make_child_relpath()` to retrieve
path objects for directories to visit. This saves the allocation of one
path object per directory in user subclasses of `PathBase`, and avoids a
second loop.

This trick does not apply when walking top-down, because users can affect
the walk by modifying *dirnames* in-place.

A side effect of this change is that, in bottom-up mode, subdirectories of
each directory are visited in reverse order, and that this order doesn't
match that of the names in *dirnames*. I suspect this is fine as the
order is arbitrary anyway.
This commit is contained in:
Barney Gale 2024-01-20 03:06:00 +00:00 committed by GitHub
parent 6313cdde58
commit 1e610fb05f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 7 additions and 4 deletions

View File

@ -820,6 +820,8 @@ class PathBase(PurePathBase):
with scandir_obj as scandir_it:
dirnames = []
filenames = []
if not top_down:
paths.append((path, dirnames, filenames))
for entry in scandir_it:
try:
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
@ -828,16 +830,15 @@ class PathBase(PurePathBase):
is_dir = False
if is_dir:
if not top_down:
paths.append(path._make_child_entry(entry))
dirnames.append(entry.name)
else:
filenames.append(entry.name)
if top_down:
yield path, dirnames, filenames
else:
paths.append((path, dirnames, filenames))
paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
def absolute(self):
"""Return an absolute version of this path

View File

@ -0,0 +1,2 @@
Speed up :meth:`pathlib.Path.walk` by using :attr:`os.DirEntry.path` where
possible.