mirror of https://github.com/python/cpython
GH-77609: Add follow_symlinks argument to `pathlib.Path.glob()` (GH-102616)
Add a keyword-only *follow_symlinks* parameter to `pathlib.Path.glob()` and`rglob()`. When *follow_symlinks* is `None` (the default), these methods follow symlinks except when evaluating "`**`" wildcards. When set to true or false, symlinks are always or never followed, respectively.
This commit is contained in:
parent
1668b41dc4
commit
ace676e2c2
|
@ -885,7 +885,7 @@ call fails (for example because the path doesn't exist).
|
|||
.. versionadded:: 3.5
|
||||
|
||||
|
||||
.. method:: Path.glob(pattern, *, case_sensitive=None)
|
||||
.. method:: Path.glob(pattern, *, case_sensitive=None, follow_symlinks=None)
|
||||
|
||||
Glob the given relative *pattern* in the directory represented by this path,
|
||||
yielding all matching files (of any kind)::
|
||||
|
@ -911,6 +911,11 @@ call fails (for example because the path doesn't exist).
|
|||
typically, case-sensitive on POSIX, and case-insensitive on Windows.
|
||||
Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
|
||||
|
||||
By default, or when the *follow_symlinks* keyword-only argument is set to
|
||||
``None``, this method follows symlinks except when expanding "``**``"
|
||||
wildcards. Set *follow_symlinks* to ``True`` to always follow symlinks, or
|
||||
``False`` to treat all symlinks as files.
|
||||
|
||||
.. note::
|
||||
Using the "``**``" pattern in large directory trees may consume
|
||||
an inordinate amount of time.
|
||||
|
@ -924,6 +929,9 @@ call fails (for example because the path doesn't exist).
|
|||
.. versionadded:: 3.12
|
||||
The *case_sensitive* argument.
|
||||
|
||||
.. versionadded:: 3.13
|
||||
The *follow_symlinks* argument.
|
||||
|
||||
.. method:: Path.group()
|
||||
|
||||
Return the name of the group owning the file. :exc:`KeyError` is raised
|
||||
|
@ -1309,7 +1317,7 @@ call fails (for example because the path doesn't exist).
|
|||
.. versionadded:: 3.6
|
||||
The *strict* argument (pre-3.6 behavior is strict).
|
||||
|
||||
.. method:: Path.rglob(pattern, *, case_sensitive=None)
|
||||
.. method:: Path.rglob(pattern, *, case_sensitive=None, follow_symlinks=None)
|
||||
|
||||
Glob the given relative *pattern* recursively. This is like calling
|
||||
:func:`Path.glob` with "``**/``" added in front of the *pattern*, where
|
||||
|
@ -1327,6 +1335,11 @@ call fails (for example because the path doesn't exist).
|
|||
typically, case-sensitive on POSIX, and case-insensitive on Windows.
|
||||
Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
|
||||
|
||||
By default, or when the *follow_symlinks* keyword-only argument is set to
|
||||
``None``, this method follows symlinks except when expanding "``**``"
|
||||
wildcards. Set *follow_symlinks* to ``True`` to always follow symlinks, or
|
||||
``False`` to treat all symlinks as files.
|
||||
|
||||
.. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob
|
||||
|
||||
.. versionchanged:: 3.11
|
||||
|
@ -1336,6 +1349,9 @@ call fails (for example because the path doesn't exist).
|
|||
.. versionadded:: 3.12
|
||||
The *case_sensitive* argument.
|
||||
|
||||
.. versionadded:: 3.13
|
||||
The *follow_symlinks* argument.
|
||||
|
||||
.. method:: Path.rmdir()
|
||||
|
||||
Remove this directory. The directory must be empty.
|
||||
|
|
|
@ -87,6 +87,12 @@ New Modules
|
|||
Improved Modules
|
||||
================
|
||||
|
||||
pathlib
|
||||
-------
|
||||
|
||||
* Add *follow_symlinks* keyword-only argument to :meth:`pathlib.Path.glob` and
|
||||
:meth:`~pathlib.Path.rglob`.
|
||||
(Contributed by Barney Gale in :gh:`77609`.)
|
||||
|
||||
Optimizations
|
||||
=============
|
||||
|
|
|
@ -105,19 +105,19 @@ class _Selector:
|
|||
self.successor = _TerminatingSelector()
|
||||
self.dironly = False
|
||||
|
||||
def select_from(self, parent_path):
|
||||
def select_from(self, parent_path, follow_symlinks):
|
||||
"""Iterate over all child paths of `parent_path` matched by this
|
||||
selector. This can contain parent_path itself."""
|
||||
path_cls = type(parent_path)
|
||||
scandir = path_cls._scandir
|
||||
if not parent_path.is_dir():
|
||||
return iter([])
|
||||
return self._select_from(parent_path, scandir)
|
||||
return self._select_from(parent_path, scandir, follow_symlinks)
|
||||
|
||||
|
||||
class _TerminatingSelector:
|
||||
|
||||
def _select_from(self, parent_path, scandir):
|
||||
def _select_from(self, parent_path, scandir, follow_symlinks):
|
||||
yield parent_path
|
||||
|
||||
|
||||
|
@ -126,9 +126,9 @@ class _ParentSelector(_Selector):
|
|||
def __init__(self, name, child_parts, flavour, case_sensitive):
|
||||
_Selector.__init__(self, child_parts, flavour, case_sensitive)
|
||||
|
||||
def _select_from(self, parent_path, scandir):
|
||||
def _select_from(self, parent_path, scandir, follow_symlinks):
|
||||
path = parent_path._make_child_relpath('..')
|
||||
for p in self.successor._select_from(path, scandir):
|
||||
for p in self.successor._select_from(path, scandir, follow_symlinks):
|
||||
yield p
|
||||
|
||||
|
||||
|
@ -141,7 +141,8 @@ class _WildcardSelector(_Selector):
|
|||
case_sensitive = _is_case_sensitive(flavour)
|
||||
self.match = _compile_pattern(pat, case_sensitive)
|
||||
|
||||
def _select_from(self, parent_path, scandir):
|
||||
def _select_from(self, parent_path, scandir, follow_symlinks):
|
||||
follow_dirlinks = True if follow_symlinks is None else follow_symlinks
|
||||
try:
|
||||
# We must close the scandir() object before proceeding to
|
||||
# avoid exhausting file descriptors when globbing deep trees.
|
||||
|
@ -153,14 +154,14 @@ class _WildcardSelector(_Selector):
|
|||
for entry in entries:
|
||||
if self.dironly:
|
||||
try:
|
||||
if not entry.is_dir():
|
||||
if not entry.is_dir(follow_symlinks=follow_dirlinks):
|
||||
continue
|
||||
except OSError:
|
||||
continue
|
||||
name = entry.name
|
||||
if self.match(name):
|
||||
path = parent_path._make_child_relpath(name)
|
||||
for p in self.successor._select_from(path, scandir):
|
||||
for p in self.successor._select_from(path, scandir, follow_symlinks):
|
||||
yield p
|
||||
|
||||
|
||||
|
@ -169,16 +170,17 @@ class _RecursiveWildcardSelector(_Selector):
|
|||
def __init__(self, pat, child_parts, flavour, case_sensitive):
|
||||
_Selector.__init__(self, child_parts, flavour, case_sensitive)
|
||||
|
||||
def _iterate_directories(self, parent_path):
|
||||
def _iterate_directories(self, parent_path, follow_symlinks):
|
||||
yield parent_path
|
||||
for dirpath, dirnames, _ in parent_path.walk():
|
||||
for dirpath, dirnames, _ in parent_path.walk(follow_symlinks=follow_symlinks):
|
||||
for dirname in dirnames:
|
||||
yield dirpath._make_child_relpath(dirname)
|
||||
|
||||
def _select_from(self, parent_path, scandir):
|
||||
def _select_from(self, parent_path, scandir, follow_symlinks):
|
||||
follow_dirlinks = False if follow_symlinks is None else follow_symlinks
|
||||
successor_select = self.successor._select_from
|
||||
for starting_point in self._iterate_directories(parent_path):
|
||||
for p in successor_select(starting_point, scandir):
|
||||
for starting_point in self._iterate_directories(parent_path, follow_dirlinks):
|
||||
for p in successor_select(starting_point, scandir, follow_symlinks):
|
||||
yield p
|
||||
|
||||
|
||||
|
@ -189,10 +191,10 @@ class _DoubleRecursiveWildcardSelector(_RecursiveWildcardSelector):
|
|||
multiple non-adjacent '**' segments.
|
||||
"""
|
||||
|
||||
def _select_from(self, parent_path, scandir):
|
||||
def _select_from(self, parent_path, scandir, follow_symlinks):
|
||||
yielded = set()
|
||||
try:
|
||||
for p in super()._select_from(parent_path, scandir):
|
||||
for p in super()._select_from(parent_path, scandir, follow_symlinks):
|
||||
if p not in yielded:
|
||||
yield p
|
||||
yielded.add(p)
|
||||
|
@ -994,7 +996,7 @@ class Path(PurePath):
|
|||
path._tail_cached = tail + [name]
|
||||
return path
|
||||
|
||||
def glob(self, pattern, *, case_sensitive=None):
|
||||
def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
|
||||
"""Iterate over this subtree and yield all existing files (of any
|
||||
kind, including directories) matching the given relative pattern.
|
||||
"""
|
||||
|
@ -1007,10 +1009,10 @@ class Path(PurePath):
|
|||
if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
|
||||
pattern_parts.append('')
|
||||
selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive)
|
||||
for p in selector.select_from(self):
|
||||
for p in selector.select_from(self, follow_symlinks):
|
||||
yield p
|
||||
|
||||
def rglob(self, pattern, *, case_sensitive=None):
|
||||
def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
|
||||
"""Recursively yield all existing files (of any kind, including
|
||||
directories) matching the given relative pattern, anywhere in
|
||||
this subtree.
|
||||
|
@ -1022,7 +1024,7 @@ class Path(PurePath):
|
|||
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
|
||||
pattern_parts.append('')
|
||||
selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive)
|
||||
for p in selector.select_from(self):
|
||||
for p in selector.select_from(self, follow_symlinks):
|
||||
yield p
|
||||
|
||||
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
|
||||
|
|
|
@ -1863,6 +1863,35 @@ class _BasePathTest(object):
|
|||
_check(path, "dirb/file*", True, [])
|
||||
_check(path, "dirb/file*", False, ["dirB/fileB"])
|
||||
|
||||
@os_helper.skip_unless_symlink
|
||||
def test_glob_follow_symlinks_common(self):
|
||||
def _check(path, glob, expected):
|
||||
actual = {path for path in path.glob(glob, follow_symlinks=True)
|
||||
if "linkD" not in path.parent.parts} # exclude symlink loop.
|
||||
self.assertEqual(actual, { P(BASE, q) for q in expected })
|
||||
P = self.cls
|
||||
p = P(BASE)
|
||||
_check(p, "fileB", [])
|
||||
_check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"])
|
||||
_check(p, "*A", ["dirA", "fileA", "linkA"])
|
||||
_check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"])
|
||||
_check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"])
|
||||
_check(p, "*/", ["dirA", "dirB", "dirC", "dirE", "linkB"])
|
||||
|
||||
@os_helper.skip_unless_symlink
|
||||
def test_glob_no_follow_symlinks_common(self):
|
||||
def _check(path, glob, expected):
|
||||
actual = {path for path in path.glob(glob, follow_symlinks=False)}
|
||||
self.assertEqual(actual, { P(BASE, q) for q in expected })
|
||||
P = self.cls
|
||||
p = P(BASE)
|
||||
_check(p, "fileB", [])
|
||||
_check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"])
|
||||
_check(p, "*A", ["dirA", "fileA", "linkA"])
|
||||
_check(p, "*B/*", ["dirB/fileB", "dirB/linkD"])
|
||||
_check(p, "*/fileB", ["dirB/fileB"])
|
||||
_check(p, "*/", ["dirA", "dirB", "dirC", "dirE"])
|
||||
|
||||
def test_rglob_common(self):
|
||||
def _check(glob, expected):
|
||||
self.assertEqual(sorted(glob), sorted(P(BASE, q) for q in expected))
|
||||
|
@ -1906,6 +1935,60 @@ class _BasePathTest(object):
|
|||
_check(p.rglob("*.txt"), ["dirC/novel.txt"])
|
||||
_check(p.rglob("*.*"), ["dirC/novel.txt"])
|
||||
|
||||
@os_helper.skip_unless_symlink
|
||||
def test_rglob_follow_symlinks_common(self):
|
||||
def _check(path, glob, expected):
|
||||
actual = {path for path in path.rglob(glob, follow_symlinks=True)
|
||||
if 'linkD' not in path.parent.parts} # exclude symlink loop.
|
||||
self.assertEqual(actual, { P(BASE, q) for q in expected })
|
||||
P = self.cls
|
||||
p = P(BASE)
|
||||
_check(p, "fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"])
|
||||
_check(p, "*/fileA", [])
|
||||
_check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"])
|
||||
_check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB",
|
||||
"dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"])
|
||||
_check(p, "*/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD",
|
||||
"dirC", "dirC/dirD", "dirE", "linkB", "linkB/linkD"])
|
||||
_check(p, "", ["", "dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD",
|
||||
"dirC", "dirE", "dirC/dirD", "linkB", "linkB/linkD"])
|
||||
|
||||
p = P(BASE, "dirC")
|
||||
_check(p, "*", ["dirC/fileC", "dirC/novel.txt",
|
||||
"dirC/dirD", "dirC/dirD/fileD"])
|
||||
_check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"])
|
||||
_check(p, "*/*", ["dirC/dirD/fileD"])
|
||||
_check(p, "*/", ["dirC/dirD"])
|
||||
_check(p, "", ["dirC", "dirC/dirD"])
|
||||
# gh-91616, a re module regression
|
||||
_check(p, "*.txt", ["dirC/novel.txt"])
|
||||
_check(p, "*.*", ["dirC/novel.txt"])
|
||||
|
||||
@os_helper.skip_unless_symlink
|
||||
def test_rglob_no_follow_symlinks_common(self):
|
||||
def _check(path, glob, expected):
|
||||
actual = {path for path in path.rglob(glob, follow_symlinks=False)}
|
||||
self.assertEqual(actual, { P(BASE, q) for q in expected })
|
||||
P = self.cls
|
||||
p = P(BASE)
|
||||
_check(p, "fileB", ["dirB/fileB"])
|
||||
_check(p, "*/fileA", [])
|
||||
_check(p, "*/fileB", ["dirB/fileB"])
|
||||
_check(p, "file*", ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD", ])
|
||||
_check(p, "*/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"])
|
||||
_check(p, "", ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"])
|
||||
|
||||
p = P(BASE, "dirC")
|
||||
_check(p, "*", ["dirC/fileC", "dirC/novel.txt",
|
||||
"dirC/dirD", "dirC/dirD/fileD"])
|
||||
_check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"])
|
||||
_check(p, "*/*", ["dirC/dirD/fileD"])
|
||||
_check(p, "*/", ["dirC/dirD"])
|
||||
_check(p, "", ["dirC", "dirC/dirD"])
|
||||
# gh-91616, a re module regression
|
||||
_check(p, "*.txt", ["dirC/novel.txt"])
|
||||
_check(p, "*.*", ["dirC/novel.txt"])
|
||||
|
||||
@os_helper.skip_unless_symlink
|
||||
def test_rglob_symlink_loop(self):
|
||||
# Don't get fooled by symlink loops (Issue #26012).
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Add *follow_symlinks* argument to :meth:`pathlib.Path.glob` and
|
||||
:meth:`~pathlib.Path.rglob`, defaulting to false.
|
Loading…
Reference in New Issue