mirror of https://github.com/python/cpython
Issue #13734: Add os.fwalk(), a directory walking function yielding file
descriptors.
This commit is contained in:
parent
fb39063202
commit
7372b06cd7
|
@ -2251,6 +2251,57 @@ Files and Directories
|
||||||
os.rmdir(os.path.join(root, name))
|
os.rmdir(os.path.join(root, name))
|
||||||
|
|
||||||
|
|
||||||
|
.. function:: fwalk(top, topdown=True, onerror=None, followlinks=False)
|
||||||
|
|
||||||
|
.. index::
|
||||||
|
single: directory; walking
|
||||||
|
single: directory; traversal
|
||||||
|
|
||||||
|
This behaves exactly like :func:`walk`, except that it yields a 4-tuple
|
||||||
|
``(dirpath, dirnames, filenames, dirfd)``.
|
||||||
|
|
||||||
|
*dirpath*, *dirnames* and *filenames* are identical to :func:`walk` output,
|
||||||
|
and *dirfd* is a file descriptor referring to the directory *dirpath*.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
Since :func:`fwalk` yields file descriptors, those are only valid until
|
||||||
|
the next iteration step, so you should duplicate them (e.g. with
|
||||||
|
:func:`dup`) if you want to keep them longer.
|
||||||
|
|
||||||
|
This example displays the number of bytes taken by non-directory files in each
|
||||||
|
directory under the starting directory, except that it doesn't look under any
|
||||||
|
CVS subdirectory::
|
||||||
|
|
||||||
|
import os
|
||||||
|
for root, dirs, files, rootfd in os.fwalk('python/Lib/email'):
|
||||||
|
print(root, "consumes", end="")
|
||||||
|
print(sum([os.fstatat(rootfd, name).st_size for name in files]),
|
||||||
|
end="")
|
||||||
|
print("bytes in", len(files), "non-directory files")
|
||||||
|
if 'CVS' in dirs:
|
||||||
|
dirs.remove('CVS') # don't visit CVS directories
|
||||||
|
|
||||||
|
In the next example, walking the tree bottom-up is essential:
|
||||||
|
:func:`unlinkat` doesn't allow deleting a directory before the directory is
|
||||||
|
empty::
|
||||||
|
|
||||||
|
# Delete everything reachable from the directory named in "top",
|
||||||
|
# assuming there are no symbolic links.
|
||||||
|
# CAUTION: This is dangerous! For example, if top == '/', it
|
||||||
|
# could delete all your disk files.
|
||||||
|
import os
|
||||||
|
for root, dirs, files, rootfd in os.fwalk(top, topdown=False):
|
||||||
|
for name in files:
|
||||||
|
os.unlinkat(rootfd, name)
|
||||||
|
for name in dirs:
|
||||||
|
os.unlinkat(rootfd, name, os.AT_REMOVEDIR)
|
||||||
|
|
||||||
|
Availability: Unix.
|
||||||
|
|
||||||
|
.. versionadded:: 3.3
|
||||||
|
|
||||||
|
|
||||||
.. _os-process:
|
.. _os-process:
|
||||||
|
|
||||||
Process Management
|
Process Management
|
||||||
|
|
|
@ -497,6 +497,10 @@ os
|
||||||
|
|
||||||
(Patch submitted by Giampaolo Rodolà in :issue:`10784`.)
|
(Patch submitted by Giampaolo Rodolà in :issue:`10784`.)
|
||||||
|
|
||||||
|
* The :mod:`os` module has a new :func:`~os.fwalk` function similar to
|
||||||
|
:func:`~os.walk` except that it also yields file descriptors referring to the
|
||||||
|
directories visited. This is especially useful to avoid symlink races.
|
||||||
|
|
||||||
* "at" functions (:issue:`4761`):
|
* "at" functions (:issue:`4761`):
|
||||||
|
|
||||||
* :func:`~os.faccessat`
|
* :func:`~os.faccessat`
|
||||||
|
|
101
Lib/os.py
101
Lib/os.py
|
@ -24,6 +24,7 @@ and opendir), and leave all pathname manipulation to os.path
|
||||||
#'
|
#'
|
||||||
|
|
||||||
import sys, errno
|
import sys, errno
|
||||||
|
import stat as st
|
||||||
|
|
||||||
_names = sys.builtin_module_names
|
_names = sys.builtin_module_names
|
||||||
|
|
||||||
|
@ -32,6 +33,9 @@ __all__ = ["altsep", "curdir", "pardir", "sep", "pathsep", "linesep",
|
||||||
"defpath", "name", "path", "devnull",
|
"defpath", "name", "path", "devnull",
|
||||||
"SEEK_SET", "SEEK_CUR", "SEEK_END"]
|
"SEEK_SET", "SEEK_CUR", "SEEK_END"]
|
||||||
|
|
||||||
|
def _exists(name):
|
||||||
|
return name in globals()
|
||||||
|
|
||||||
def _get_exports_list(module):
|
def _get_exports_list(module):
|
||||||
try:
|
try:
|
||||||
return list(module.__all__)
|
return list(module.__all__)
|
||||||
|
@ -120,7 +124,12 @@ def _get_masked_mode(mode):
|
||||||
umask(mask)
|
umask(mask)
|
||||||
return mode & ~mask
|
return mode & ~mask
|
||||||
|
|
||||||
#'
|
def _are_same_file(stat1, stat2):
|
||||||
|
"""Helper function that checks whether two stat results refer to the same
|
||||||
|
file.
|
||||||
|
"""
|
||||||
|
return (stat1.st_ino == stat2.st_ino and stat1.st_dev == stat2.st_dev)
|
||||||
|
#
|
||||||
|
|
||||||
# Super directory utilities.
|
# Super directory utilities.
|
||||||
# (Inspired by Eric Raymond; the doc strings are mostly his)
|
# (Inspired by Eric Raymond; the doc strings are mostly his)
|
||||||
|
@ -151,7 +160,6 @@ def makedirs(name, mode=0o777, exist_ok=False):
|
||||||
try:
|
try:
|
||||||
mkdir(name, mode)
|
mkdir(name, mode)
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
import stat as st
|
|
||||||
if not (e.errno == errno.EEXIST and exist_ok and path.isdir(name) and
|
if not (e.errno == errno.EEXIST and exist_ok and path.isdir(name) and
|
||||||
st.S_IMODE(lstat(name).st_mode) == _get_masked_mode(mode)):
|
st.S_IMODE(lstat(name).st_mode) == _get_masked_mode(mode)):
|
||||||
raise
|
raise
|
||||||
|
@ -298,6 +306,92 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
|
||||||
|
|
||||||
__all__.append("walk")
|
__all__.append("walk")
|
||||||
|
|
||||||
|
if _exists("openat"):
|
||||||
|
|
||||||
|
def fwalk(top, topdown=True, onerror=None, followlinks=False):
|
||||||
|
"""Directory tree generator.
|
||||||
|
|
||||||
|
This behaves exactly like walk(), except that it yields a 4-tuple
|
||||||
|
|
||||||
|
dirpath, dirnames, filenames, dirfd
|
||||||
|
|
||||||
|
`dirpath`, `dirnames` and `filenames` are identical to walk() output,
|
||||||
|
and `dirfd` is a file descriptor referring to the directory `dirpath`.
|
||||||
|
|
||||||
|
The advantage of walkfd() over walk() is that it's safe against symlink
|
||||||
|
races (when followlinks is False).
|
||||||
|
|
||||||
|
Caution:
|
||||||
|
Since fwalk() yields file descriptors, those are only valid until the
|
||||||
|
next iteration step, so you should dup() them if you want to keep them
|
||||||
|
for a longer period.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
import os
|
||||||
|
for root, dirs, files, rootfd in os.fwalk('python/Lib/email'):
|
||||||
|
print(root, "consumes", end="")
|
||||||
|
print(sum([os.fstatat(rootfd, name).st_size for name in files]),
|
||||||
|
end="")
|
||||||
|
print("bytes in", len(files), "non-directory files")
|
||||||
|
if 'CVS' in dirs:
|
||||||
|
dirs.remove('CVS') # don't visit CVS directories
|
||||||
|
"""
|
||||||
|
# Note: To guard against symlink races, we use the standard
|
||||||
|
# lstat()/open()/fstat() trick.
|
||||||
|
orig_st = lstat(top)
|
||||||
|
topfd = open(top, O_RDONLY)
|
||||||
|
try:
|
||||||
|
if (followlinks or (st.S_ISDIR(orig_st.st_mode) and
|
||||||
|
_are_same_file(orig_st, fstat(topfd)))):
|
||||||
|
for x in _fwalk(topfd, top, topdown, onerror, followlinks):
|
||||||
|
yield x
|
||||||
|
finally:
|
||||||
|
close(topfd)
|
||||||
|
|
||||||
|
def _fwalk(topfd, toppath, topdown, onerror, followlinks):
|
||||||
|
# Note: This uses O(depth of the directory tree) file descriptors: if
|
||||||
|
# necessary, it can be adapted to only require O(1) FDs, see issue
|
||||||
|
# #13734.
|
||||||
|
|
||||||
|
# whether to follow symlinks
|
||||||
|
flag = 0 if followlinks else AT_SYMLINK_NOFOLLOW
|
||||||
|
|
||||||
|
names = fdlistdir(topfd)
|
||||||
|
dirs, nondirs = [], []
|
||||||
|
for name in names:
|
||||||
|
# Here, we don't use AT_SYMLINK_NOFOLLOW to be consistent with
|
||||||
|
# walk() which reports symlinks to directories as directories. We do
|
||||||
|
# however check for symlinks before recursing into a subdirectory.
|
||||||
|
if st.S_ISDIR(fstatat(topfd, name).st_mode):
|
||||||
|
dirs.append(name)
|
||||||
|
else:
|
||||||
|
nondirs.append(name)
|
||||||
|
|
||||||
|
if topdown:
|
||||||
|
yield toppath, dirs, nondirs, topfd
|
||||||
|
|
||||||
|
for name in dirs:
|
||||||
|
try:
|
||||||
|
orig_st = fstatat(topfd, name, flag)
|
||||||
|
dirfd = openat(topfd, name, O_RDONLY)
|
||||||
|
except error as err:
|
||||||
|
if onerror is not None:
|
||||||
|
onerror(err)
|
||||||
|
return
|
||||||
|
try:
|
||||||
|
if followlinks or _are_same_file(orig_st, fstat(dirfd)):
|
||||||
|
dirpath = path.join(toppath, name)
|
||||||
|
for x in _fwalk(dirfd, dirpath, topdown, onerror, followlinks):
|
||||||
|
yield x
|
||||||
|
finally:
|
||||||
|
close(dirfd)
|
||||||
|
|
||||||
|
if not topdown:
|
||||||
|
yield toppath, dirs, nondirs, topfd
|
||||||
|
|
||||||
|
__all__.append("fwalk")
|
||||||
|
|
||||||
# Make sure os.environ exists, at least
|
# Make sure os.environ exists, at least
|
||||||
try:
|
try:
|
||||||
environ
|
environ
|
||||||
|
@ -598,9 +692,6 @@ def _fscodec():
|
||||||
fsencode, fsdecode = _fscodec()
|
fsencode, fsdecode = _fscodec()
|
||||||
del _fscodec
|
del _fscodec
|
||||||
|
|
||||||
def _exists(name):
|
|
||||||
return name in globals()
|
|
||||||
|
|
||||||
# Supply spawn*() (probably only for Unix)
|
# Supply spawn*() (probably only for Unix)
|
||||||
if _exists("fork") and not _exists("spawnv") and _exists("execv"):
|
if _exists("fork") and not _exists("spawnv") and _exists("execv"):
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,8 @@ import uuid
|
||||||
import asyncore
|
import asyncore
|
||||||
import asynchat
|
import asynchat
|
||||||
import socket
|
import socket
|
||||||
|
import itertools
|
||||||
|
import stat
|
||||||
try:
|
try:
|
||||||
import threading
|
import threading
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -159,7 +161,6 @@ class StatAttributeTests(unittest.TestCase):
|
||||||
if not hasattr(os, "stat"):
|
if not hasattr(os, "stat"):
|
||||||
return
|
return
|
||||||
|
|
||||||
import stat
|
|
||||||
result = os.stat(fname)
|
result = os.stat(fname)
|
||||||
|
|
||||||
# Make sure direct access works
|
# Make sure direct access works
|
||||||
|
@ -476,7 +477,7 @@ class EnvironTests(mapping_tests.BasicTestMappingProtocol):
|
||||||
class WalkTests(unittest.TestCase):
|
class WalkTests(unittest.TestCase):
|
||||||
"""Tests for os.walk()."""
|
"""Tests for os.walk()."""
|
||||||
|
|
||||||
def test_traversal(self):
|
def setUp(self):
|
||||||
import os
|
import os
|
||||||
from os.path import join
|
from os.path import join
|
||||||
|
|
||||||
|
@ -586,6 +587,60 @@ class WalkTests(unittest.TestCase):
|
||||||
os.remove(dirname)
|
os.remove(dirname)
|
||||||
os.rmdir(support.TESTFN)
|
os.rmdir(support.TESTFN)
|
||||||
|
|
||||||
|
|
||||||
|
@unittest.skipUnless(hasattr(os, 'fwalk'), "Test needs os.fwalk()")
|
||||||
|
class FwalkTests(WalkTests):
|
||||||
|
"""Tests for os.fwalk()."""
|
||||||
|
|
||||||
|
def test_compare_to_walk(self):
|
||||||
|
# compare with walk() results
|
||||||
|
for topdown, followlinks in itertools.product((True, False), repeat=2):
|
||||||
|
args = support.TESTFN, topdown, None, followlinks
|
||||||
|
expected = {}
|
||||||
|
for root, dirs, files in os.walk(*args):
|
||||||
|
expected[root] = (set(dirs), set(files))
|
||||||
|
|
||||||
|
for root, dirs, files, rootfd in os.fwalk(*args):
|
||||||
|
self.assertIn(root, expected)
|
||||||
|
self.assertEqual(expected[root], (set(dirs), set(files)))
|
||||||
|
|
||||||
|
def test_dir_fd(self):
|
||||||
|
# check returned file descriptors
|
||||||
|
for topdown, followlinks in itertools.product((True, False), repeat=2):
|
||||||
|
args = support.TESTFN, topdown, None, followlinks
|
||||||
|
for root, dirs, files, rootfd in os.fwalk(*args):
|
||||||
|
# check that the FD is valid
|
||||||
|
os.fstat(rootfd)
|
||||||
|
# check that fdlistdir() returns consistent information
|
||||||
|
self.assertEqual(set(os.fdlistdir(rootfd)), set(dirs) | set(files))
|
||||||
|
|
||||||
|
def test_fd_leak(self):
|
||||||
|
# Since we're opening a lot of FDs, we must be careful to avoid leaks:
|
||||||
|
# we both check that calling fwalk() a large number of times doesn't
|
||||||
|
# yield EMFILE, and that the minimum allocated FD hasn't changed.
|
||||||
|
minfd = os.dup(1)
|
||||||
|
os.close(minfd)
|
||||||
|
for i in range(256):
|
||||||
|
for x in os.fwalk(support.TESTFN):
|
||||||
|
pass
|
||||||
|
newfd = os.dup(1)
|
||||||
|
self.addCleanup(os.close, newfd)
|
||||||
|
self.assertEqual(newfd, minfd)
|
||||||
|
|
||||||
|
def tearDown(self):
|
||||||
|
# cleanup
|
||||||
|
for root, dirs, files, rootfd in os.fwalk(support.TESTFN, topdown=False):
|
||||||
|
for name in files:
|
||||||
|
os.unlinkat(rootfd, name)
|
||||||
|
for name in dirs:
|
||||||
|
st = os.fstatat(rootfd, name, os.AT_SYMLINK_NOFOLLOW)
|
||||||
|
if stat.S_ISDIR(st.st_mode):
|
||||||
|
os.unlinkat(rootfd, name, os.AT_REMOVEDIR)
|
||||||
|
else:
|
||||||
|
os.unlinkat(rootfd, name)
|
||||||
|
os.rmdir(support.TESTFN)
|
||||||
|
|
||||||
|
|
||||||
class MakedirTests(unittest.TestCase):
|
class MakedirTests(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
os.mkdir(support.TESTFN)
|
os.mkdir(support.TESTFN)
|
||||||
|
@ -1700,6 +1755,7 @@ def test_main():
|
||||||
StatAttributeTests,
|
StatAttributeTests,
|
||||||
EnvironTests,
|
EnvironTests,
|
||||||
WalkTests,
|
WalkTests,
|
||||||
|
FwalkTests,
|
||||||
MakedirTests,
|
MakedirTests,
|
||||||
DevNullTests,
|
DevNullTests,
|
||||||
URandomTests,
|
URandomTests,
|
||||||
|
|
|
@ -466,6 +466,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #13734: Add os.fwalk(), a directory walking function yielding file
|
||||||
|
descriptors.
|
||||||
|
|
||||||
- Issue #2945: Make the distutils upload command aware of bdist_rpm products.
|
- Issue #2945: Make the distutils upload command aware of bdist_rpm products.
|
||||||
|
|
||||||
- Issue #13712: pysetup create should not convert package_data to extra_files.
|
- Issue #13712: pysetup create should not convert package_data to extra_files.
|
||||||
|
|
Loading…
Reference in New Issue