Add `pathlib._abc.PathModuleBase` (#113893)

Path modules provide a subset of the `os.path` API, specifically those
functions needed to provide `PurePathBase` functionality. Each
`PurePathBase` subclass references its path module via a `pathmod` class
attribute.

This commit adds a new `PathModuleBase` class, which provides abstract
methods that unconditionally raise `UnsupportedOperation`. An instance of
this class is assigned to `PurePathBase.pathmod`, replacing `posixpath`.
As a result, `PurePathBase` is no longer POSIX-y by default, and
all its methods raise `UnsupportedOperation` courtesy of `pathmod`.

Users who subclass `PurePathBase` or `PathBase` should choose the path
syntax by setting `pathmod` to `posixpath`, `ntpath`, `os.path`, or their
own subclass of `PathModuleBase`, as circumstances demand.
This commit is contained in:
Barney Gale 2024-01-14 21:49:53 +00:00 committed by GitHub
parent c2808431b3
commit ca6cf56330
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 182 additions and 59 deletions

View File

@ -33,6 +33,15 @@ __all__ = [
]
# Reference for Windows paths can be found at
# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file .
_WIN_RESERVED_NAMES = frozenset(
{'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
{f'COM{c}' for c in '123456789\xb9\xb2\xb3'} |
{f'LPT{c}' for c in '123456789\xb9\xb2\xb3'}
)
class _PathParents(Sequence):
"""This object provides sequence-like access to the logical ancestors
of a path. Don't try to construct it yourself."""
@ -76,6 +85,10 @@ class PurePath(_abc.PurePathBase):
"""
__slots__ = (
# The `_raw_paths` slot stores unnormalized string paths. This is set
# in the `__init__()` method.
'_raw_paths',
# The `_drv`, `_root` and `_tail_cached` slots store parsed and
# normalized parts of the path. They are set when any of the `drive`,
# `root` or `_tail` properties are accessed for the first time. The
@ -141,6 +154,26 @@ class PurePath(_abc.PurePathBase):
# Avoid calling super().__init__, as an optimisation
self._raw_paths = paths
def joinpath(self, *pathsegments):
"""Combine this path with one or several arguments, and return a
new path representing either a subpath (if all arguments are relative
paths) or a totally different path (if one of the arguments is
anchored).
"""
return self.with_segments(self, *pathsegments)
def __truediv__(self, key):
try:
return self.with_segments(self, key)
except TypeError:
return NotImplemented
def __rtruediv__(self, key):
try:
return self.with_segments(key, self)
except TypeError:
return NotImplemented
def __reduce__(self):
# Using the parts tuple helps share interned path parts
# when pickling related paths.
@ -386,6 +419,33 @@ class PurePath(_abc.PurePathBase):
other = self.with_segments(other)
return other == self or other in self.parents
def is_absolute(self):
"""True if the path is absolute (has both a root and, if applicable,
a drive)."""
if self.pathmod is posixpath:
# Optimization: work with raw paths on POSIX.
for path in self._raw_paths:
if path.startswith('/'):
return True
return False
return self.pathmod.isabs(self)
def is_reserved(self):
"""Return True if the path contains one of the special names reserved
by the system, if any."""
if self.pathmod is not ntpath or not self.name:
return False
# NOTE: the rules for reserved names seem somewhat complicated
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
# exist). We err on the side of caution and return True for paths
# which are not considered reserved by Windows.
if self.drive.startswith('\\\\'):
# UNC paths are never reserved.
return False
name = self.name.partition('.')[0].partition(':')[0].rstrip(' ')
return name.upper() in _WIN_RESERVED_NAMES
def as_uri(self):
"""Return the path as a URI."""
if not self.is_absolute():

View File

@ -12,7 +12,6 @@ resemble pathlib's PurePath and Path respectively.
"""
import functools
import posixpath
from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
@ -20,14 +19,6 @@ from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
# Internals
#
# Reference for Windows paths can be found at
# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file .
_WIN_RESERVED_NAMES = frozenset(
{'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
{f'COM{c}' for c in '123456789\xb9\xb2\xb3'} |
{f'LPT{c}' for c in '123456789\xb9\xb2\xb3'}
)
_WINERROR_NOT_READY = 21 # drive exists but is not accessible
_WINERROR_INVALID_NAME = 123 # fix for bpo-35306
_WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself
@ -144,6 +135,53 @@ class UnsupportedOperation(NotImplementedError):
pass
class PathModuleBase:
"""Base class for path modules, which do low-level path manipulation.
Path modules provide a subset of the os.path API, specifically those
functions needed to provide PurePathBase functionality. Each PurePathBase
subclass references its path module via a 'pathmod' class attribute.
Every method in this base class raises an UnsupportedOperation exception.
"""
@classmethod
def _unsupported(cls, attr):
raise UnsupportedOperation(f"{cls.__name__}.{attr} is unsupported")
@property
def sep(self):
"""The character used to separate path components."""
self._unsupported('sep')
def join(self, path, *paths):
"""Join path segments."""
self._unsupported('join()')
def split(self, path):
"""Split the path into a pair (head, tail), where *head* is everything
before the final path separator, and *tail* is everything after.
Either part may be empty.
"""
self._unsupported('split()')
def splitroot(self, path):
"""Split the pathname path into a 3-item tuple (drive, root, tail),
where *drive* is a device name or mount point, *root* is a string of
separators after the drive, and *tail* is everything after the root.
Any part may be empty."""
self._unsupported('splitroot()')
def normcase(self, path):
"""Normalize the case of the path."""
self._unsupported('normcase()')
def isabs(self, path):
"""Returns whether the path is absolute, i.e. unaffected by the
current directory or drive."""
self._unsupported('isabs()')
class PurePathBase:
"""Base class for pure path objects.
@ -154,19 +192,19 @@ class PurePathBase:
"""
__slots__ = (
# The `_raw_paths` slot stores unnormalized string paths. This is set
# in the `__init__()` method.
'_raw_paths',
# The `_raw_path` slot store a joined string path. This is set in the
# `__init__()` method.
'_raw_path',
# The '_resolving' slot stores a boolean indicating whether the path
# is being processed by `PathBase.resolve()`. This prevents duplicate
# work from occurring when `resolve()` calls `stat()` or `readlink()`.
'_resolving',
)
pathmod = posixpath
pathmod = PathModuleBase()
def __init__(self, *paths):
self._raw_paths = paths
def __init__(self, path, *paths):
self._raw_path = self.pathmod.join(path, *paths) if paths else path
self._resolving = False
def with_segments(self, *pathsegments):
@ -176,11 +214,6 @@ class PurePathBase:
"""
return type(self)(*pathsegments)
@property
def _raw_path(self):
"""The joined but unnormalized path."""
return self.pathmod.join(*self._raw_paths)
def __str__(self):
"""Return the string representation of the path, suitable for
passing to system calls."""
@ -194,7 +227,7 @@ class PurePathBase:
@property
def drive(self):
"""The drive prefix (letter or UNC path), if any."""
return self.pathmod.splitdrive(self._raw_path)[0]
return self.pathmod.splitroot(self._raw_path)[0]
@property
def root(self):
@ -210,7 +243,7 @@ class PurePathBase:
@property
def name(self):
"""The final path component, if any."""
return self.pathmod.basename(self._raw_path)
return self.pathmod.split(self._raw_path)[1]
@property
def suffix(self):
@ -251,10 +284,10 @@ class PurePathBase:
def with_name(self, name):
"""Return a new path with the file name changed."""
dirname = self.pathmod.dirname
if dirname(name):
split = self.pathmod.split
if split(name)[0]:
raise ValueError(f"Invalid name {name!r}")
return self.with_segments(dirname(self._raw_path), name)
return self.with_segments(split(self._raw_path)[0], name)
def with_stem(self, stem):
"""Return a new path with the stem changed."""
@ -336,17 +369,17 @@ class PurePathBase:
paths) or a totally different path (if one of the arguments is
anchored).
"""
return self.with_segments(*self._raw_paths, *pathsegments)
return self.with_segments(self._raw_path, *pathsegments)
def __truediv__(self, key):
try:
return self.joinpath(key)
return self.with_segments(self._raw_path, key)
except TypeError:
return NotImplemented
def __rtruediv__(self, key):
try:
return self.with_segments(key, *self._raw_paths)
return self.with_segments(key, self._raw_path)
except TypeError:
return NotImplemented
@ -371,7 +404,7 @@ class PurePathBase:
def parent(self):
"""The logical parent of the path."""
path = self._raw_path
parent = self.pathmod.dirname(path)
parent = self.pathmod.split(path)[0]
if path != parent:
parent = self.with_segments(parent)
parent._resolving = self._resolving
@ -381,43 +414,20 @@ class PurePathBase:
@property
def parents(self):
"""A sequence of this path's logical parents."""
dirname = self.pathmod.dirname
split = self.pathmod.split
path = self._raw_path
parent = dirname(path)
parent = split(path)[0]
parents = []
while path != parent:
parents.append(self.with_segments(parent))
path = parent
parent = dirname(path)
parent = split(path)[0]
return tuple(parents)
def is_absolute(self):
"""True if the path is absolute (has both a root and, if applicable,
a drive)."""
if self.pathmod is posixpath:
# Optimization: work with raw paths on POSIX.
for path in self._raw_paths:
if path.startswith('/'):
return True
return False
else:
return self.pathmod.isabs(self._raw_path)
def is_reserved(self):
"""Return True if the path contains one of the special names reserved
by the system, if any."""
if self.pathmod is posixpath or not self.name:
return False
# NOTE: the rules for reserved names seem somewhat complicated
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
# exist). We err on the side of caution and return True for paths
# which are not considered reserved by Windows.
if self.drive.startswith('\\\\'):
# UNC paths are never reserved.
return False
name = self.name.partition('.')[0].partition(':')[0].rstrip(' ')
return name.upper() in _WIN_RESERVED_NAMES
return self.pathmod.isabs(self._raw_path)
def match(self, path_pattern, *, case_sensitive=None):
"""
@ -726,7 +736,7 @@ class PathBase(PurePathBase):
raise ValueError("Unacceptable pattern: {!r}".format(pattern))
pattern_parts = list(path_pattern.parts)
if not self.pathmod.basename(pattern):
if not self.pathmod.split(pattern)[1]:
# GH-65238: pathlib doesn't preserve trailing slash. Add it back.
pattern_parts.append('')

View File

@ -1151,6 +1151,7 @@ class PathTest(test_pathlib_abc.DummyPathTest, PurePathTest):
def test_matches_pathbase_api(self):
our_names = {name for name in dir(self.cls) if name[0] != '_'}
our_names.remove('is_reserved') # only present in PurePath
path_names = {name for name in dir(pathlib._abc.PathBase) if name[0] != '_'}
self.assertEqual(our_names, path_names)
for attr_name in our_names:

View File

@ -5,7 +5,7 @@ import errno
import stat
import unittest
from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase
from pathlib._abc import UnsupportedOperation, PathModuleBase, PurePathBase, PathBase
import posixpath
from test.support.os_helper import TESTFN
@ -17,6 +17,20 @@ class UnsupportedOperationTest(unittest.TestCase):
self.assertTrue(isinstance(UnsupportedOperation(), NotImplementedError))
class PathModuleBaseTest(unittest.TestCase):
cls = PathModuleBase
def test_unsupported_operation(self):
m = self.cls()
e = UnsupportedOperation
with self.assertRaises(e):
m.sep
self.assertRaises(e, m.join, 'foo')
self.assertRaises(e, m.split, 'foo')
self.assertRaises(e, m.splitroot, 'foo')
self.assertRaises(e, m.normcase, 'foo')
self.assertRaises(e, m.isabs, 'foo')
#
# Tests for the pure classes.
#
@ -25,6 +39,42 @@ class UnsupportedOperationTest(unittest.TestCase):
class PurePathBaseTest(unittest.TestCase):
cls = PurePathBase
def test_unsupported_operation_pure(self):
p = self.cls('foo')
e = UnsupportedOperation
with self.assertRaises(e):
p.drive
with self.assertRaises(e):
p.root
with self.assertRaises(e):
p.anchor
with self.assertRaises(e):
p.parts
with self.assertRaises(e):
p.parent
with self.assertRaises(e):
p.parents
with self.assertRaises(e):
p.name
with self.assertRaises(e):
p.stem
with self.assertRaises(e):
p.suffix
with self.assertRaises(e):
p.suffixes
with self.assertRaises(e):
p / 'bar'
with self.assertRaises(e):
'bar' / p
self.assertRaises(e, p.joinpath, 'bar')
self.assertRaises(e, p.with_name, 'bar')
self.assertRaises(e, p.with_stem, 'bar')
self.assertRaises(e, p.with_suffix, '.txt')
self.assertRaises(e, p.relative_to, '')
self.assertRaises(e, p.is_relative_to, '')
self.assertRaises(e, p.is_absolute)
self.assertRaises(e, p.match, '*')
def test_magic_methods(self):
P = self.cls
self.assertFalse(hasattr(P, '__fspath__'))
@ -39,11 +89,12 @@ class PurePathBaseTest(unittest.TestCase):
self.assertIs(P.__ge__, object.__ge__)
def test_pathmod(self):
self.assertIs(self.cls.pathmod, posixpath)
self.assertIsInstance(self.cls.pathmod, PathModuleBase)
class DummyPurePath(PurePathBase):
__slots__ = ()
pathmod = posixpath
def __eq__(self, other):
if not isinstance(other, DummyPurePath):
@ -669,6 +720,7 @@ class DummyPath(PathBase):
memory.
"""
__slots__ = ()
pathmod = posixpath
_files = {}
_directories = {}