mirror of https://github.com/python/cpython
GH-117586: Speed up `pathlib.Path.walk()` by working with strings (#117726)
Move `pathlib.Path.walk()` implementation into `glob._Globber`. The new `glob._Globber.walk()` classmethod works with strings internally, which is a little faster than generating `Path` objects and keeping them normalized. The `pathlib.Path.walk()` method converts the strings back to path objects. In the private pathlib ABCs, our existing subclass of `_Globber` ensures that `PathBase` instances are used throughout. Follow-up to #117589.
This commit is contained in:
parent
6258844c27
commit
0cc71bde00
37
Lib/glob.py
37
Lib/glob.py
|
@ -498,3 +498,40 @@ class _Globber:
|
|||
yield path
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def walk(cls, root, top_down, on_error, follow_symlinks):
|
||||
"""Walk the directory tree from the given root, similar to os.walk().
|
||||
"""
|
||||
paths = [root]
|
||||
while paths:
|
||||
path = paths.pop()
|
||||
if isinstance(path, tuple):
|
||||
yield path
|
||||
continue
|
||||
try:
|
||||
with cls.scandir(path) as scandir_it:
|
||||
dirnames = []
|
||||
filenames = []
|
||||
if not top_down:
|
||||
paths.append((path, dirnames, filenames))
|
||||
for entry in scandir_it:
|
||||
name = entry.name
|
||||
try:
|
||||
if entry.is_dir(follow_symlinks=follow_symlinks):
|
||||
if not top_down:
|
||||
paths.append(cls.parse_entry(entry))
|
||||
dirnames.append(name)
|
||||
else:
|
||||
filenames.append(name)
|
||||
except OSError:
|
||||
filenames.append(name)
|
||||
except OSError as error:
|
||||
if on_error is not None:
|
||||
on_error(error)
|
||||
else:
|
||||
if top_down:
|
||||
yield path, dirnames, filenames
|
||||
if dirnames:
|
||||
prefix = cls.add_slash(path)
|
||||
paths += [cls.concat_path(prefix, d) for d in reversed(dirnames)]
|
||||
|
|
|
@ -586,18 +586,6 @@ class Path(_abc.PathBase, PurePath):
|
|||
"""
|
||||
return (self._make_child_relpath(name) for name in os.listdir(self))
|
||||
|
||||
def _scandir(self):
|
||||
return os.scandir(self)
|
||||
|
||||
def _make_child_direntry(self, entry):
|
||||
# Transform an entry yielded from _scandir() into a path object.
|
||||
path_str = entry.name if str(self) == '.' else entry.path
|
||||
path = self.with_segments(path_str)
|
||||
path._str = path_str
|
||||
path._drv = self.drive
|
||||
path._root = self.root
|
||||
path._tail_cached = self._tail + [entry.name]
|
||||
return path
|
||||
|
||||
def _make_child_relpath(self, name):
|
||||
if not name:
|
||||
|
@ -663,8 +651,12 @@ class Path(_abc.PathBase, PurePath):
|
|||
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
|
||||
"""Walk the directory tree from this directory, similar to os.walk()."""
|
||||
sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks)
|
||||
return _abc.PathBase.walk(
|
||||
self, top_down=top_down, on_error=on_error, follow_symlinks=follow_symlinks)
|
||||
root_dir = str(self)
|
||||
results = self._globber.walk(root_dir, top_down, on_error, follow_symlinks)
|
||||
for path_str, dirnames, filenames in results:
|
||||
if root_dir == '.':
|
||||
path_str = path_str[2:]
|
||||
yield self._from_parsed_string(path_str), dirnames, filenames
|
||||
|
||||
def absolute(self):
|
||||
"""Return an absolute version of this path
|
||||
|
|
|
@ -45,9 +45,15 @@ def _is_case_sensitive(parser):
|
|||
|
||||
class Globber(glob._Globber):
|
||||
lstat = operator.methodcaller('lstat')
|
||||
scandir = operator.methodcaller('_scandir')
|
||||
add_slash = operator.methodcaller('joinpath', '')
|
||||
|
||||
@staticmethod
|
||||
def scandir(path):
|
||||
# Emulate os.scandir(), which returns an object that can be used as a
|
||||
# context manager. This method is called by walk() and glob().
|
||||
from contextlib import nullcontext
|
||||
return nullcontext(path.iterdir())
|
||||
|
||||
@staticmethod
|
||||
def concat_path(path, text):
|
||||
"""Appends text to the given path.
|
||||
|
@ -677,20 +683,6 @@ class PathBase(PurePathBase):
|
|||
"""
|
||||
raise UnsupportedOperation(self._unsupported_msg('iterdir()'))
|
||||
|
||||
def _scandir(self):
|
||||
# Emulate os.scandir(), which returns an object that can be used as a
|
||||
# context manager. This method is called by walk() and glob().
|
||||
from contextlib import nullcontext
|
||||
return nullcontext(self.iterdir())
|
||||
|
||||
def _make_child_direntry(self, entry):
|
||||
# Transform an entry yielded from _scandir() into a path object.
|
||||
# PathBase._scandir() yields PathBase objects, so this is a no-op.
|
||||
return entry
|
||||
|
||||
def _make_child_relpath(self, name):
|
||||
return self.joinpath(name)
|
||||
|
||||
def _glob_selector(self, parts, case_sensitive, recurse_symlinks):
|
||||
if case_sensitive is None:
|
||||
case_sensitive = _is_case_sensitive(self.parser)
|
||||
|
@ -724,48 +716,7 @@ class PathBase(PurePathBase):
|
|||
|
||||
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
|
||||
"""Walk the directory tree from this directory, similar to os.walk()."""
|
||||
paths = [self]
|
||||
|
||||
while paths:
|
||||
path = paths.pop()
|
||||
if isinstance(path, tuple):
|
||||
yield path
|
||||
continue
|
||||
|
||||
# We may not have read permission for self, in which case we can't
|
||||
# get a list of the files the directory contains. os.walk()
|
||||
# always suppressed the exception in that instance, rather than
|
||||
# blow up for a minor reason when (say) a thousand readable
|
||||
# directories are still left to visit. That logic is copied here.
|
||||
try:
|
||||
scandir_obj = path._scandir()
|
||||
except OSError as error:
|
||||
if on_error is not None:
|
||||
on_error(error)
|
||||
continue
|
||||
|
||||
with scandir_obj as scandir_it:
|
||||
dirnames = []
|
||||
filenames = []
|
||||
if not top_down:
|
||||
paths.append((path, dirnames, filenames))
|
||||
for entry in scandir_it:
|
||||
try:
|
||||
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
|
||||
except OSError:
|
||||
# Carried over from os.path.isdir().
|
||||
is_dir = False
|
||||
|
||||
if is_dir:
|
||||
if not top_down:
|
||||
paths.append(path._make_child_direntry(entry))
|
||||
dirnames.append(entry.name)
|
||||
else:
|
||||
filenames.append(entry.name)
|
||||
|
||||
if top_down:
|
||||
yield path, dirnames, filenames
|
||||
paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
|
||||
return self._globber.walk(self, top_down, on_error, follow_symlinks)
|
||||
|
||||
def absolute(self):
|
||||
"""Return an absolute version of this path
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Speed up :meth:`pathlib.Path.walk` by working with strings internally.
|
Loading…
Reference in New Issue