mirror of https://github.com/python/cpython
GH-101362: Omit path anchor from `pathlib.PurePath()._parts` (GH-102476)
Improve performance of path construction by skipping the addition of the path anchor (`drive + root`) to the internal `_parts` list. Rename this attribute to `_tail` for clarity.
This commit is contained in:
parent
0a675f4bb5
commit
2c673d5e93
171
Lib/pathlib.py
171
Lib/pathlib.py
|
@ -210,20 +210,17 @@ class _RecursiveWildcardSelector(_Selector):
|
|||
class _PathParents(Sequence):
|
||||
"""This object provides sequence-like access to the logical ancestors
|
||||
of a path. Don't try to construct it yourself."""
|
||||
__slots__ = ('_pathcls', '_drv', '_root', '_parts')
|
||||
__slots__ = ('_pathcls', '_drv', '_root', '_tail')
|
||||
|
||||
def __init__(self, path):
|
||||
# We don't store the instance to avoid reference cycles
|
||||
self._pathcls = type(path)
|
||||
self._drv = path.drive
|
||||
self._root = path.root
|
||||
self._parts = path._parts
|
||||
self._tail = path._tail
|
||||
|
||||
def __len__(self):
|
||||
if self._drv or self._root:
|
||||
return len(self._parts) - 1
|
||||
else:
|
||||
return len(self._parts)
|
||||
return len(self._tail)
|
||||
|
||||
def __getitem__(self, idx):
|
||||
if isinstance(idx, slice):
|
||||
|
@ -234,7 +231,7 @@ class _PathParents(Sequence):
|
|||
if idx < 0:
|
||||
idx += len(self)
|
||||
return self._pathcls._from_parsed_parts(self._drv, self._root,
|
||||
self._parts[:-idx - 1])
|
||||
self._tail[:-idx - 1])
|
||||
|
||||
def __repr__(self):
|
||||
return "<{}.parents>".format(self._pathcls.__name__)
|
||||
|
@ -249,9 +246,41 @@ class PurePath(object):
|
|||
PureWindowsPath object. You can also instantiate either of these classes
|
||||
directly, regardless of your system.
|
||||
"""
|
||||
|
||||
__slots__ = (
|
||||
'_raw_path', '_drv', '_root', '_parts_cached',
|
||||
'_str', '_hash', '_parts_tuple', '_parts_normcase_cached',
|
||||
# The `_raw_path` slot stores an unnormalized string path. This is set
|
||||
# in the `__init__()` method.
|
||||
'_raw_path',
|
||||
|
||||
# The `_drv`, `_root` and `_tail_cached` slots store parsed and
|
||||
# normalized parts of the path. They are set when any of the `drive`,
|
||||
# `root` or `_tail` properties are accessed for the first time. The
|
||||
# three-part division corresponds to the result of
|
||||
# `os.path.splitroot()`, except that the tail is further split on path
|
||||
# separators (i.e. it is a list of strings), and that the root and
|
||||
# tail are normalized.
|
||||
'_drv', '_root', '_tail_cached',
|
||||
|
||||
# The `_str` slot stores the string representation of the path,
|
||||
# computed from the drive, root and tail when `__str__()` is called
|
||||
# for the first time. It's used to implement `_str_normcase`
|
||||
'_str',
|
||||
|
||||
# The `_str_normcase_cached` slot stores the string path with
|
||||
# normalized case. It is set when the `_str_normcase` property is
|
||||
# accessed for the first time. It's used to implement `__eq__()`
|
||||
# `__hash__()`, and `_parts_normcase`
|
||||
'_str_normcase_cached',
|
||||
|
||||
# The `_parts_normcase_cached` slot stores the case-normalized
|
||||
# string path after splitting on path separators. It's set when the
|
||||
# `_parts_normcase` property is accessed for the first time. It's used
|
||||
# to implement comparison methods like `__lt__()`.
|
||||
'_parts_normcase_cached',
|
||||
|
||||
# The `_hash` slot stores the hash of the case-normalized string
|
||||
# path. It's set when `__hash__()` is called for the first time.
|
||||
'_hash',
|
||||
)
|
||||
_flavour = os.path
|
||||
|
||||
|
@ -277,10 +306,7 @@ class PurePath(object):
|
|||
path = os.fspath(args[0])
|
||||
else:
|
||||
path = self._flavour.join(*args)
|
||||
if isinstance(path, str):
|
||||
# Force-cast str subclasses to str (issue #21127)
|
||||
path = str(path)
|
||||
else:
|
||||
if not isinstance(path, str):
|
||||
raise TypeError(
|
||||
"argument should be a str or an os.PathLike "
|
||||
"object where __fspath__ returns a str, "
|
||||
|
@ -299,33 +325,32 @@ class PurePath(object):
|
|||
if drv.startswith(sep):
|
||||
# pathlib assumes that UNC paths always have a root.
|
||||
root = sep
|
||||
unfiltered_parsed = [drv + root] + rel.split(sep)
|
||||
parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
|
||||
parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.']
|
||||
return drv, root, parsed
|
||||
|
||||
def _load_parts(self):
|
||||
drv, root, parts = self._parse_path(self._raw_path)
|
||||
drv, root, tail = self._parse_path(self._raw_path)
|
||||
self._drv = drv
|
||||
self._root = root
|
||||
self._parts_cached = parts
|
||||
self._tail_cached = tail
|
||||
|
||||
@classmethod
|
||||
def _from_parsed_parts(cls, drv, root, parts):
|
||||
path = cls._format_parsed_parts(drv, root, parts)
|
||||
def _from_parsed_parts(cls, drv, root, tail):
|
||||
path = cls._format_parsed_parts(drv, root, tail)
|
||||
self = cls(path)
|
||||
self._str = path or '.'
|
||||
self._drv = drv
|
||||
self._root = root
|
||||
self._parts_cached = parts
|
||||
self._tail_cached = tail
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def _format_parsed_parts(cls, drv, root, parts):
|
||||
def _format_parsed_parts(cls, drv, root, tail):
|
||||
if drv or root:
|
||||
return drv + root + cls._flavour.sep.join(parts[1:])
|
||||
elif parts and cls._flavour.splitdrive(parts[0])[0]:
|
||||
parts = ['.'] + parts
|
||||
return cls._flavour.sep.join(parts)
|
||||
return drv + root + cls._flavour.sep.join(tail)
|
||||
elif tail and cls._flavour.splitdrive(tail[0])[0]:
|
||||
tail = ['.'] + tail
|
||||
return cls._flavour.sep.join(tail)
|
||||
|
||||
def __str__(self):
|
||||
"""Return the string representation of the path, suitable for
|
||||
|
@ -334,7 +359,7 @@ class PurePath(object):
|
|||
return self._str
|
||||
except AttributeError:
|
||||
self._str = self._format_parsed_parts(self.drive, self.root,
|
||||
self._parts) or '.'
|
||||
self._tail) or '.'
|
||||
return self._str
|
||||
|
||||
def __fspath__(self):
|
||||
|
@ -374,25 +399,34 @@ class PurePath(object):
|
|||
path = str(self)
|
||||
return prefix + urlquote_from_bytes(os.fsencode(path))
|
||||
|
||||
@property
|
||||
def _str_normcase(self):
|
||||
# String with normalized case, for hashing and equality checks
|
||||
try:
|
||||
return self._str_normcase_cached
|
||||
except AttributeError:
|
||||
self._str_normcase_cached = self._flavour.normcase(str(self))
|
||||
return self._str_normcase_cached
|
||||
|
||||
@property
|
||||
def _parts_normcase(self):
|
||||
# Cached parts with normalized case, for hashing and comparison.
|
||||
# Cached parts with normalized case, for comparisons.
|
||||
try:
|
||||
return self._parts_normcase_cached
|
||||
except AttributeError:
|
||||
self._parts_normcase_cached = [self._flavour.normcase(p) for p in self._parts]
|
||||
self._parts_normcase_cached = self._str_normcase.split(self._flavour.sep)
|
||||
return self._parts_normcase_cached
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, PurePath):
|
||||
return NotImplemented
|
||||
return self._parts_normcase == other._parts_normcase and self._flavour is other._flavour
|
||||
return self._str_normcase == other._str_normcase and self._flavour is other._flavour
|
||||
|
||||
def __hash__(self):
|
||||
try:
|
||||
return self._hash
|
||||
except AttributeError:
|
||||
self._hash = hash(tuple(self._parts_normcase))
|
||||
self._hash = hash(self._str_normcase)
|
||||
return self._hash
|
||||
|
||||
def __lt__(self, other):
|
||||
|
@ -434,12 +468,12 @@ class PurePath(object):
|
|||
return self._root
|
||||
|
||||
@property
|
||||
def _parts(self):
|
||||
def _tail(self):
|
||||
try:
|
||||
return self._parts_cached
|
||||
return self._tail_cached
|
||||
except AttributeError:
|
||||
self._load_parts()
|
||||
return self._parts_cached
|
||||
return self._tail_cached
|
||||
|
||||
@property
|
||||
def anchor(self):
|
||||
|
@ -450,10 +484,10 @@ class PurePath(object):
|
|||
@property
|
||||
def name(self):
|
||||
"""The final path component, if any."""
|
||||
parts = self._parts
|
||||
if len(parts) == (1 if (self.drive or self.root) else 0):
|
||||
tail = self._tail
|
||||
if not tail:
|
||||
return ''
|
||||
return parts[-1]
|
||||
return tail[-1]
|
||||
|
||||
@property
|
||||
def suffix(self):
|
||||
|
@ -501,7 +535,7 @@ class PurePath(object):
|
|||
if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail):
|
||||
raise ValueError("Invalid name %r" % (name))
|
||||
return self._from_parsed_parts(self.drive, self.root,
|
||||
self._parts[:-1] + [name])
|
||||
self._tail[:-1] + [name])
|
||||
|
||||
def with_stem(self, stem):
|
||||
"""Return a new path with the stem changed."""
|
||||
|
@ -526,7 +560,7 @@ class PurePath(object):
|
|||
else:
|
||||
name = name[:-len(old_suffix)] + suffix
|
||||
return self._from_parsed_parts(self.drive, self.root,
|
||||
self._parts[:-1] + [name])
|
||||
self._tail[:-1] + [name])
|
||||
|
||||
def relative_to(self, other, /, *_deprecated, walk_up=False):
|
||||
"""Return the relative path to another path identified by the passed
|
||||
|
@ -551,7 +585,7 @@ class PurePath(object):
|
|||
raise ValueError(f"{str(self)!r} and {str(other)!r} have different anchors")
|
||||
if step and not walk_up:
|
||||
raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}")
|
||||
parts = ('..',) * step + self.parts[len(path.parts):]
|
||||
parts = ['..'] * step + self._tail[len(path._tail):]
|
||||
return path_cls(*parts)
|
||||
|
||||
def is_relative_to(self, other, /, *_deprecated):
|
||||
|
@ -570,13 +604,10 @@ class PurePath(object):
|
|||
def parts(self):
|
||||
"""An object providing sequence-like access to the
|
||||
components in the filesystem path."""
|
||||
# We cache the tuple to avoid building a new one each time .parts
|
||||
# is accessed. XXX is this necessary?
|
||||
try:
|
||||
return self._parts_tuple
|
||||
except AttributeError:
|
||||
self._parts_tuple = tuple(self._parts)
|
||||
return self._parts_tuple
|
||||
if self.drive or self.root:
|
||||
return (self.drive + self.root,) + tuple(self._tail)
|
||||
else:
|
||||
return tuple(self._tail)
|
||||
|
||||
def joinpath(self, *args):
|
||||
"""Combine this path with one or several arguments, and return a
|
||||
|
@ -603,10 +634,10 @@ class PurePath(object):
|
|||
"""The logical parent of the path."""
|
||||
drv = self.drive
|
||||
root = self.root
|
||||
parts = self._parts
|
||||
if len(parts) == 1 and (drv or root):
|
||||
tail = self._tail
|
||||
if not tail:
|
||||
return self
|
||||
return self._from_parsed_parts(drv, root, parts[:-1])
|
||||
return self._from_parsed_parts(drv, root, tail[:-1])
|
||||
|
||||
@property
|
||||
def parents(self):
|
||||
|
@ -624,29 +655,29 @@ class PurePath(object):
|
|||
def is_reserved(self):
|
||||
"""Return True if the path contains one of the special names reserved
|
||||
by the system, if any."""
|
||||
if self._flavour is posixpath or not self._parts:
|
||||
if self._flavour is posixpath or not self._tail:
|
||||
return False
|
||||
|
||||
# NOTE: the rules for reserved names seem somewhat complicated
|
||||
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
|
||||
# exist). We err on the side of caution and return True for paths
|
||||
# which are not considered reserved by Windows.
|
||||
if self._parts[0].startswith('\\\\'):
|
||||
if self.drive.startswith('\\\\'):
|
||||
# UNC paths are never reserved.
|
||||
return False
|
||||
name = self._parts[-1].partition('.')[0].partition(':')[0].rstrip(' ')
|
||||
name = self._tail[-1].partition('.')[0].partition(':')[0].rstrip(' ')
|
||||
return name.upper() in _WIN_RESERVED_NAMES
|
||||
|
||||
def match(self, path_pattern):
|
||||
"""
|
||||
Return True if this path matches the given pattern.
|
||||
"""
|
||||
path_pattern = self._flavour.normcase(path_pattern)
|
||||
drv, root, pat_parts = self._parse_path(path_pattern)
|
||||
if not pat_parts:
|
||||
pat = type(self)(path_pattern)
|
||||
if not pat.parts:
|
||||
raise ValueError("empty pattern")
|
||||
pat_parts = pat._parts_normcase
|
||||
parts = self._parts_normcase
|
||||
if drv or root:
|
||||
if pat.drive or pat.root:
|
||||
if len(pat_parts) != len(parts):
|
||||
return False
|
||||
elif len(pat_parts) > len(parts):
|
||||
|
@ -707,11 +738,21 @@ class Path(PurePath):
|
|||
cls = WindowsPath if os.name == 'nt' else PosixPath
|
||||
return object.__new__(cls)
|
||||
|
||||
def _make_child_relpath(self, part):
|
||||
# This is an optimization used for dir walking. `part` must be
|
||||
# a single part relative to this path.
|
||||
parts = self._parts + [part]
|
||||
return self._from_parsed_parts(self.drive, self.root, parts)
|
||||
def _make_child_relpath(self, name):
|
||||
path_str = str(self)
|
||||
tail = self._tail
|
||||
if tail:
|
||||
path_str = f'{path_str}{self._flavour.sep}{name}'
|
||||
elif path_str != '.':
|
||||
path_str = f'{path_str}{name}'
|
||||
else:
|
||||
path_str = name
|
||||
path = type(self)(path_str)
|
||||
path._str = path_str
|
||||
path._drv = self.drive
|
||||
path._root = self.root
|
||||
path._tail_cached = tail + [name]
|
||||
return path
|
||||
|
||||
def __enter__(self):
|
||||
# In previous versions of pathlib, __exit__() marked this path as
|
||||
|
@ -1196,12 +1237,12 @@ class Path(PurePath):
|
|||
(as returned by os.path.expanduser)
|
||||
"""
|
||||
if (not (self.drive or self.root) and
|
||||
self._parts and self._parts[0][:1] == '~'):
|
||||
homedir = self._flavour.expanduser(self._parts[0])
|
||||
self._tail and self._tail[0][:1] == '~'):
|
||||
homedir = self._flavour.expanduser(self._tail[0])
|
||||
if homedir[:1] == "~":
|
||||
raise RuntimeError("Could not determine home directory.")
|
||||
drv, root, parts = self._parse_path(homedir)
|
||||
return self._from_parsed_parts(drv, root, parts + self._parts[1:])
|
||||
drv, root, tail = self._parse_path(homedir)
|
||||
return self._from_parsed_parts(drv, root, tail + self._tail[1:])
|
||||
|
||||
return self
|
||||
|
||||
|
|
|
@ -346,8 +346,6 @@ class _BasePurePathTest(object):
|
|||
p = P('a/b')
|
||||
parts = p.parts
|
||||
self.assertEqual(parts, ('a', 'b'))
|
||||
# The object gets reused.
|
||||
self.assertIs(parts, p.parts)
|
||||
# When the path is absolute, the anchor is a separate part.
|
||||
p = P('/a/b')
|
||||
parts = p.parts
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Speed up :class:`pathlib.Path` construction by omitting the path anchor from
|
||||
the internal list of path parts.
|
Loading…
Reference in New Issue