GH-76846, GH-85281: Call `__new__()` and `__init__()` on pathlib subclasses (GH-102789)

Fix an issue where `__new__()` and `__init__()` were not called on subclasses of `pathlib.PurePath` and `Path` in some circumstances.

Paths are now normalized on-demand. This speeds up path construction, `p.joinpath(q)`, and `p / q`.

Co-authored-by: Steve Dower <steve.dower@microsoft.com>
This commit is contained in:
Barney Gale 2023-04-03 19:57:11 +01:00 committed by GitHub
parent 2a721258a1
commit 11c302055a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 107 additions and 68 deletions

View File

@ -16,7 +16,6 @@ import sys
import warnings
from _collections_abc import Sequence
from errno import ENOENT, ENOTDIR, EBADF, ELOOP
from operator import attrgetter
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
from urllib.parse import quote_from_bytes as urlquote_from_bytes
@ -216,8 +215,8 @@ class _PathParents(Sequence):
def __init__(self, path):
# We don't store the instance to avoid reference cycles
self._pathcls = type(path)
self._drv = path._drv
self._root = path._root
self._drv = path.drive
self._root = path.root
self._parts = path._parts
def __len__(self):
@ -251,12 +250,12 @@ class PurePath(object):
directly, regardless of your system.
"""
__slots__ = (
'_drv', '_root', '_parts',
'_raw_path', '_drv', '_root', '_parts_cached',
'_str', '_hash', '_parts_tuple', '_parts_normcase_cached',
)
_flavour = os.path
def __new__(cls, *args):
def __new__(cls, *args, **kwargs):
"""Construct a PurePath from one or several strings and or existing
PurePath objects. The strings and path objects are combined so as
to yield a canonicalized path, which is incorporated into the
@ -264,23 +263,20 @@ class PurePath(object):
"""
if cls is PurePath:
cls = PureWindowsPath if os.name == 'nt' else PurePosixPath
return cls._from_parts(args)
return object.__new__(cls)
def __reduce__(self):
# Using the parts tuple helps share interned path parts
# when pickling related paths.
return (self.__class__, tuple(self._parts))
return (self.__class__, self.parts)
@classmethod
def _parse_parts(cls, parts):
if not parts:
return '', '', []
elif len(parts) == 1:
path = os.fspath(parts[0])
def __init__(self, *args):
if not args:
path = ''
elif len(args) == 1:
path = os.fspath(args[0])
else:
path = cls._flavour.join(*parts)
sep = cls._flavour.sep
altsep = cls._flavour.altsep
path = self._flavour.join(*args)
if isinstance(path, str):
# Force-cast str subclasses to str (issue #21127)
path = str(path)
@ -289,6 +285,14 @@ class PurePath(object):
"argument should be a str or an os.PathLike "
"object where __fspath__ returns a str, "
f"not {type(path).__name__!r}")
self._raw_path = path
@classmethod
def _parse_path(cls, path):
if not path:
return '', '', []
sep = cls._flavour.sep
altsep = cls._flavour.altsep
if altsep:
path = path.replace(altsep, sep)
drv, root, rel = cls._flavour.splitroot(path)
@ -299,21 +303,20 @@ class PurePath(object):
parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
return drv, root, parsed
@classmethod
def _from_parts(cls, args):
self = object.__new__(cls)
drv, root, parts = self._parse_parts(args)
def _load_parts(self):
drv, root, parts = self._parse_path(self._raw_path)
self._drv = drv
self._root = root
self._parts = parts
return self
self._parts_cached = parts
@classmethod
def _from_parsed_parts(cls, drv, root, parts):
self = object.__new__(cls)
path = cls._format_parsed_parts(drv, root, parts)
self = cls(path)
self._str = path or '.'
self._drv = drv
self._root = root
self._parts = parts
self._parts_cached = parts
return self
@classmethod
@ -330,7 +333,7 @@ class PurePath(object):
try:
return self._str
except AttributeError:
self._str = self._format_parsed_parts(self._drv, self._root,
self._str = self._format_parsed_parts(self.drive, self.root,
self._parts) or '.'
return self._str
@ -356,7 +359,7 @@ class PurePath(object):
if not self.is_absolute():
raise ValueError("relative path can't be expressed as a file URI")
drive = self._drv
drive = self.drive
if len(drive) == 2 and drive[1] == ':':
# It's a path on a local drive => 'file:///c:/a/b'
prefix = 'file:///' + drive
@ -412,23 +415,43 @@ class PurePath(object):
return NotImplemented
return self._parts_normcase >= other._parts_normcase
drive = property(attrgetter('_drv'),
doc="""The drive prefix (letter or UNC path), if any.""")
@property
def drive(self):
"""The drive prefix (letter or UNC path), if any."""
try:
return self._drv
except AttributeError:
self._load_parts()
return self._drv
root = property(attrgetter('_root'),
doc="""The root of the path, if any.""")
@property
def root(self):
"""The root of the path, if any."""
try:
return self._root
except AttributeError:
self._load_parts()
return self._root
@property
def _parts(self):
try:
return self._parts_cached
except AttributeError:
self._load_parts()
return self._parts_cached
@property
def anchor(self):
"""The concatenation of the drive and root, or ''."""
anchor = self._drv + self._root
anchor = self.drive + self.root
return anchor
@property
def name(self):
"""The final path component, if any."""
parts = self._parts
if len(parts) == (1 if (self._drv or self._root) else 0):
if len(parts) == (1 if (self.drive or self.root) else 0):
return ''
return parts[-1]
@ -477,7 +500,7 @@ class PurePath(object):
drv, root, tail = f.splitroot(name)
if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail):
raise ValueError("Invalid name %r" % (name))
return self._from_parsed_parts(self._drv, self._root,
return self._from_parsed_parts(self.drive, self.root,
self._parts[:-1] + [name])
def with_stem(self, stem):
@ -502,7 +525,7 @@ class PurePath(object):
name = name + suffix
else:
name = name[:-len(old_suffix)] + suffix
return self._from_parsed_parts(self._drv, self._root,
return self._from_parsed_parts(self.drive, self.root,
self._parts[:-1] + [name])
def relative_to(self, other, /, *_deprecated, walk_up=False):
@ -561,22 +584,7 @@ class PurePath(object):
paths) or a totally different path (if one of the arguments is
anchored).
"""
drv1, root1, parts1 = self._drv, self._root, self._parts
drv2, root2, parts2 = self._parse_parts(args)
if root2:
if not drv2 and drv1:
return self._from_parsed_parts(drv1, root2, [drv1 + root2] + parts2[1:])
else:
return self._from_parsed_parts(drv2, root2, parts2)
elif drv2:
if drv2 == drv1 or self._flavour.normcase(drv2) == self._flavour.normcase(drv1):
# Same drive => second path is relative to the first.
return self._from_parsed_parts(drv1, root1, parts1 + parts2[1:])
else:
return self._from_parsed_parts(drv2, root2, parts2)
else:
# Second path is non-anchored (common case).
return self._from_parsed_parts(drv1, root1, parts1 + parts2)
return self.__class__(self._raw_path, *args)
def __truediv__(self, key):
try:
@ -586,15 +594,15 @@ class PurePath(object):
def __rtruediv__(self, key):
try:
return self._from_parts([key] + self._parts)
return type(self)(key, self._raw_path)
except TypeError:
return NotImplemented
@property
def parent(self):
"""The logical parent of the path."""
drv = self._drv
root = self._root
drv = self.drive
root = self.root
parts = self._parts
if len(parts) == 1 and (drv or root):
return self
@ -610,7 +618,7 @@ class PurePath(object):
a drive)."""
# ntpath.isabs() is defective - see GH-44626 .
if self._flavour is ntpath:
return bool(self._drv and self._root)
return bool(self.drive and self.root)
return self._flavour.isabs(self)
def is_reserved(self):
@ -634,7 +642,7 @@ class PurePath(object):
Return True if this path matches the given pattern.
"""
path_pattern = self._flavour.normcase(path_pattern)
drv, root, pat_parts = self._parse_parts((path_pattern,))
drv, root, pat_parts = self._parse_path(path_pattern)
if not pat_parts:
raise ValueError("empty pattern")
parts = self._parts_normcase
@ -687,20 +695,23 @@ class Path(PurePath):
"""
__slots__ = ()
def __new__(cls, *args, **kwargs):
def __init__(self, *args, **kwargs):
if kwargs:
msg = ("support for supplying keyword arguments to pathlib.PurePath "
"is deprecated and scheduled for removal in Python {remove}")
warnings._deprecated("pathlib.PurePath(**kwargs)", msg, remove=(3, 14))
super().__init__(*args)
def __new__(cls, *args, **kwargs):
if cls is Path:
cls = WindowsPath if os.name == 'nt' else PosixPath
return cls._from_parts(args)
return object.__new__(cls)
def _make_child_relpath(self, part):
# This is an optimization used for dir walking. `part` must be
# a single part relative to this path.
parts = self._parts + [part]
return self._from_parsed_parts(self._drv, self._root, parts)
return self._from_parsed_parts(self.drive, self.root, parts)
def __enter__(self):
# In previous versions of pathlib, __exit__() marked this path as
@ -770,7 +781,7 @@ class Path(PurePath):
sys.audit("pathlib.Path.glob", self, pattern)
if not pattern:
raise ValueError("Unacceptable pattern: {!r}".format(pattern))
drv, root, pattern_parts = self._parse_parts((pattern,))
drv, root, pattern_parts = self._parse_path(pattern)
if drv or root:
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
@ -785,7 +796,7 @@ class Path(PurePath):
this subtree.
"""
sys.audit("pathlib.Path.rglob", self, pattern)
drv, root, pattern_parts = self._parse_parts((pattern,))
drv, root, pattern_parts = self._parse_path(pattern)
if drv or root:
raise NotImplementedError("Non-relative patterns are unsupported")
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
@ -802,12 +813,12 @@ class Path(PurePath):
"""
if self.is_absolute():
return self
elif self._drv:
elif self.drive:
# There is a CWD on each drive-letter drive.
cwd = self._flavour.abspath(self._drv)
cwd = self._flavour.abspath(self.drive)
else:
cwd = os.getcwd()
return self._from_parts([cwd] + self._parts)
return type(self)(cwd, self._raw_path)
def resolve(self, strict=False):
"""
@ -825,7 +836,7 @@ class Path(PurePath):
except OSError as e:
check_eloop(e)
raise
p = self._from_parts((s,))
p = type(self)(s)
# In non-strict mode, realpath() doesn't raise on symlink loops.
# Ensure we get an exception by calling stat()
@ -915,7 +926,7 @@ class Path(PurePath):
"""
if not hasattr(os, "readlink"):
raise NotImplementedError("os.readlink() not available on this system")
return self._from_parts((os.readlink(self),))
return type(self)(os.readlink(self))
def touch(self, mode=0o666, exist_ok=True):
"""
@ -1184,12 +1195,12 @@ class Path(PurePath):
""" Return a new path with expanded ~ and ~user constructs
(as returned by os.path.expanduser)
"""
if (not (self._drv or self._root) and
if (not (self.drive or self.root) and
self._parts and self._parts[0][:1] == '~'):
homedir = self._flavour.expanduser(self._parts[0])
if homedir[:1] == "~":
raise RuntimeError("Could not determine home directory.")
drv, root, parts = self._parse_parts((homedir,))
drv, root, parts = self._parse_path(homedir)
return self._from_parsed_parts(drv, root, parts + self._parts[1:])
return self

View File

@ -27,7 +27,9 @@ except ImportError:
class _BaseFlavourTest(object):
def _check_parse_parts(self, arg, expected):
f = self.cls._parse_parts
def f(parts):
path = self.cls(*parts)._raw_path
return self.cls._parse_path(path)
sep = self.flavour.sep
altsep = self.flavour.altsep
actual = f([x.replace('/', sep) for x in arg])
@ -136,6 +138,14 @@ class NTFlavourTest(_BaseFlavourTest, unittest.TestCase):
# Tests for the pure classes.
#
class _BasePurePathSubclass(object):
init_called = False
def __init__(self, *args):
super().__init__(*args)
self.init_called = True
class _BasePurePathTest(object):
# Keys are canonical paths, values are list of tuples of arguments
@ -221,6 +231,21 @@ class _BasePurePathTest(object):
self._check_str_subclass('a/b.txt')
self._check_str_subclass('/a/b.txt')
def test_init_called_common(self):
class P(_BasePurePathSubclass, self.cls):
pass
p = P('foo', 'bar')
self.assertTrue((p / 'foo').init_called)
self.assertTrue(('foo' / p).init_called)
self.assertTrue(p.joinpath('foo').init_called)
self.assertTrue(p.with_name('foo').init_called)
self.assertTrue(p.with_stem('foo').init_called)
self.assertTrue(p.with_suffix('.foo').init_called)
self.assertTrue(p.relative_to('foo').init_called)
self.assertTrue(p.parent.init_called)
for parent in p.parents:
self.assertTrue(parent.init_called)
def test_join_common(self):
P = self.cls
p = P('a/b')

View File

@ -0,0 +1,3 @@
Fix issue where ``__new__()`` and ``__init__()`` methods of
:class:`pathlib.PurePath` and :class:`~pathlib.Path` subclasses were not
called in some circumstances.