mirror of https://github.com/python/cpython
GH-126363: Speed up pattern parsing in `pathlib.Path.glob()` (#126364)
The implementation of `Path.glob()` does rather a hacky thing: it calls `self.with_segments()` to convert the given pattern to a `Path` object, and then peeks at the private `_raw_path` attribute to see if pathlib removed a trailing slash from the pattern. In this patch, we make `glob()` use a new `_parse_pattern()` classmethod that splits the pattern into parts while preserving information about any trailing slash. This skips the cost of creating a `Path` object, and avoids some path anchor normalization, which makes `Path.glob()` slightly faster. But mostly it's about making the code less naughty. Co-authored-by: Tomas R. <tomas.roun8@gmail.com>
This commit is contained in:
parent
2e95c5ba3b
commit
9b7294c3a5
|
@ -274,6 +274,31 @@ class PurePath(PurePathBase):
|
||||||
root = sep
|
root = sep
|
||||||
return drv, root, [x for x in rel.split(sep) if x and x != '.']
|
return drv, root, [x for x in rel.split(sep) if x and x != '.']
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _parse_pattern(cls, pattern):
|
||||||
|
"""Parse a glob pattern to a list of parts. This is much like
|
||||||
|
_parse_path, except:
|
||||||
|
|
||||||
|
- Rather than normalizing and returning the drive and root, we raise
|
||||||
|
NotImplementedError if either are present.
|
||||||
|
- If the path has no real parts, we raise ValueError.
|
||||||
|
- If the path ends in a slash, then a final empty part is added.
|
||||||
|
"""
|
||||||
|
drv, root, rel = cls.parser.splitroot(pattern)
|
||||||
|
if root or drv:
|
||||||
|
raise NotImplementedError("Non-relative patterns are unsupported")
|
||||||
|
sep = cls.parser.sep
|
||||||
|
altsep = cls.parser.altsep
|
||||||
|
if altsep:
|
||||||
|
rel = rel.replace(altsep, sep)
|
||||||
|
parts = [x for x in rel.split(sep) if x and x != '.']
|
||||||
|
if not parts:
|
||||||
|
raise ValueError(f"Unacceptable pattern: {str(pattern)!r}")
|
||||||
|
elif rel.endswith(sep):
|
||||||
|
# GH-65238: preserve trailing slash in glob patterns.
|
||||||
|
parts.append('')
|
||||||
|
return parts
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _raw_path(self):
|
def _raw_path(self):
|
||||||
"""The joined but unnormalized path."""
|
"""The joined but unnormalized path."""
|
||||||
|
@ -641,17 +666,7 @@ class Path(PathBase, PurePath):
|
||||||
kind, including directories) matching the given relative pattern.
|
kind, including directories) matching the given relative pattern.
|
||||||
"""
|
"""
|
||||||
sys.audit("pathlib.Path.glob", self, pattern)
|
sys.audit("pathlib.Path.glob", self, pattern)
|
||||||
if not isinstance(pattern, PurePath):
|
parts = self._parse_pattern(pattern)
|
||||||
pattern = self.with_segments(pattern)
|
|
||||||
if pattern.anchor:
|
|
||||||
raise NotImplementedError("Non-relative patterns are unsupported")
|
|
||||||
parts = pattern._tail.copy()
|
|
||||||
if not parts:
|
|
||||||
raise ValueError("Unacceptable pattern: {!r}".format(pattern))
|
|
||||||
raw = pattern._raw_path
|
|
||||||
if raw[-1] in (self.parser.sep, self.parser.altsep):
|
|
||||||
# GH-65238: pathlib doesn't preserve trailing slash. Add it back.
|
|
||||||
parts.append('')
|
|
||||||
select = self._glob_selector(parts[::-1], case_sensitive, recurse_symlinks)
|
select = self._glob_selector(parts[::-1], case_sensitive, recurse_symlinks)
|
||||||
root = str(self)
|
root = str(self)
|
||||||
paths = select(root)
|
paths = select(root)
|
||||||
|
@ -672,9 +687,7 @@ class Path(PathBase, PurePath):
|
||||||
this subtree.
|
this subtree.
|
||||||
"""
|
"""
|
||||||
sys.audit("pathlib.Path.rglob", self, pattern)
|
sys.audit("pathlib.Path.rglob", self, pattern)
|
||||||
if not isinstance(pattern, PurePath):
|
pattern = self.parser.join('**', pattern)
|
||||||
pattern = self.with_segments(pattern)
|
|
||||||
pattern = '**' / pattern
|
|
||||||
return self.glob(pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks)
|
return self.glob(pattern, case_sensitive=case_sensitive, recurse_symlinks=recurse_symlinks)
|
||||||
|
|
||||||
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
|
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Speed up pattern parsing in :meth:`pathlib.Path.glob` by skipping creation
|
||||||
|
of a :class:`pathlib.Path` object for the pattern.
|
Loading…
Reference in New Issue