bpo-25996: Added support of file descriptors in os.scandir() on Unix. (#502)
os.fwalk() is sped up by 2 times by using os.scandir().
This commit is contained in:
parent
0a58f72762
commit
ea720fe7e9
|
@ -2029,6 +2029,9 @@ features:
|
|||
attributes of each :class:`os.DirEntry` will be ``bytes``; in all other
|
||||
circumstances, they will be of type ``str``.
|
||||
|
||||
This function can also support :ref:`specifying a file descriptor
|
||||
<path_fd>`; the file descriptor must refer to a directory.
|
||||
|
||||
The :func:`scandir` iterator supports the :term:`context manager` protocol
|
||||
and has the following method:
|
||||
|
||||
|
@ -2075,6 +2078,9 @@ features:
|
|||
|
||||
The function accepts a :term:`path-like object`.
|
||||
|
||||
.. versionchanged:: 3.7
|
||||
Added support for :ref:`file descriptors <path_fd>` on Unix.
|
||||
|
||||
|
||||
.. class:: DirEntry
|
||||
|
||||
|
@ -2114,7 +2120,9 @@ features:
|
|||
The entry's full path name: equivalent to ``os.path.join(scandir_path,
|
||||
entry.name)`` where *scandir_path* is the :func:`scandir` *path*
|
||||
argument. The path is only absolute if the :func:`scandir` *path*
|
||||
argument was absolute.
|
||||
argument was absolute. If the :func:`scandir` *path*
|
||||
argument was a :ref:`file descriptor <path_fd>`, the :attr:`path`
|
||||
attribute is the same as the :attr:`name` attribute.
|
||||
|
||||
The :attr:`path` attribute will be ``bytes`` if the :func:`scandir`
|
||||
*path* argument is of type ``bytes`` and ``str`` otherwise. Use
|
||||
|
|
|
@ -108,6 +108,9 @@ os
|
|||
Added support for :class:`bytes` paths in :func:`~os.fwalk`. (Contributed by
|
||||
Serhiy Storchaka in :issue:`28682`.)
|
||||
|
||||
Added support for :ref:`file descriptors <path_fd>` in :func:`~os.scandir`
|
||||
on Unix. (Contributed by Serhiy Storchaka in :issue:`25996`.)
|
||||
|
||||
unittest.mock
|
||||
-------------
|
||||
|
||||
|
@ -148,6 +151,10 @@ Optimizations
|
|||
:func:`~math.erfc` in the :mod:`math` module. (Contributed by Serhiy
|
||||
Storchaka in :issue:`26121`.)
|
||||
|
||||
* The :func:`os.fwalk` function has been sped up by 2 times. This was done
|
||||
using the :func:`os.scandir` function.
|
||||
(Contributed by Serhiy Storchaka in :issue:`25996`.)
|
||||
|
||||
|
||||
Build and C API Changes
|
||||
=======================
|
||||
|
|
38
Lib/os.py
38
Lib/os.py
|
@ -129,6 +129,7 @@ if _exists("_have_functions"):
|
|||
_add("HAVE_FCHMOD", "chmod")
|
||||
_add("HAVE_FCHOWN", "chown")
|
||||
_add("HAVE_FDOPENDIR", "listdir")
|
||||
_add("HAVE_FDOPENDIR", "scandir")
|
||||
_add("HAVE_FEXECVE", "execve")
|
||||
_set.add(stat) # fstat always works
|
||||
_add("HAVE_FTRUNCATE", "truncate")
|
||||
|
@ -416,7 +417,7 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
|
|||
|
||||
__all__.append("walk")
|
||||
|
||||
if {open, stat} <= supports_dir_fd and {listdir, stat} <= supports_fd:
|
||||
if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd:
|
||||
|
||||
def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=None):
|
||||
"""Directory tree generator.
|
||||
|
@ -455,6 +456,7 @@ if {open, stat} <= supports_dir_fd and {listdir, stat} <= supports_fd:
|
|||
top = fspath(top)
|
||||
# Note: To guard against symlink races, we use the standard
|
||||
# lstat()/open()/fstat() trick.
|
||||
if not follow_symlinks:
|
||||
orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd)
|
||||
topfd = open(top, O_RDONLY, dir_fd=dir_fd)
|
||||
try:
|
||||
|
@ -470,35 +472,41 @@ if {open, stat} <= supports_dir_fd and {listdir, stat} <= supports_fd:
|
|||
# necessary, it can be adapted to only require O(1) FDs, see issue
|
||||
# #13734.
|
||||
|
||||
names = listdir(topfd)
|
||||
scandir_it = scandir(topfd)
|
||||
dirs = []
|
||||
nondirs = []
|
||||
entries = None if topdown or follow_symlinks else []
|
||||
for entry in scandir_it:
|
||||
name = entry.name
|
||||
if isbytes:
|
||||
names = map(fsencode, names)
|
||||
dirs, nondirs = [], []
|
||||
for name in names:
|
||||
name = fsencode(name)
|
||||
try:
|
||||
# Here, we don't use AT_SYMLINK_NOFOLLOW to be consistent with
|
||||
# walk() which reports symlinks to directories as directories.
|
||||
# We do however check for symlinks before recursing into
|
||||
# a subdirectory.
|
||||
if st.S_ISDIR(stat(name, dir_fd=topfd).st_mode):
|
||||
if entry.is_dir():
|
||||
dirs.append(name)
|
||||
if entries is not None:
|
||||
entries.append(entry)
|
||||
else:
|
||||
nondirs.append(name)
|
||||
except OSError:
|
||||
try:
|
||||
# Add dangling symlinks, ignore disappeared files
|
||||
if st.S_ISLNK(stat(name, dir_fd=topfd, follow_symlinks=False)
|
||||
.st_mode):
|
||||
if entry.is_symlink():
|
||||
nondirs.append(name)
|
||||
except OSError:
|
||||
continue
|
||||
pass
|
||||
|
||||
if topdown:
|
||||
yield toppath, dirs, nondirs, topfd
|
||||
|
||||
for name in dirs:
|
||||
for name in dirs if entries is None else zip(dirs, entries):
|
||||
try:
|
||||
orig_st = stat(name, dir_fd=topfd, follow_symlinks=follow_symlinks)
|
||||
if not follow_symlinks:
|
||||
if topdown:
|
||||
orig_st = stat(name, dir_fd=topfd, follow_symlinks=False)
|
||||
else:
|
||||
assert entries is not None
|
||||
name, entry = name
|
||||
orig_st = entry.stat(follow_symlinks=False)
|
||||
dirfd = open(name, O_RDONLY, dir_fd=topfd)
|
||||
except OSError as err:
|
||||
if onerror is not None:
|
||||
|
|
|
@ -3313,6 +3313,35 @@ class TestScandir(unittest.TestCase):
|
|||
self.assertEqual(entry.path,
|
||||
os.fsencode(os.path.join(self.path, 'file.txt')))
|
||||
|
||||
@unittest.skipUnless(os.listdir in os.supports_fd,
|
||||
'fd support for listdir required for this test.')
|
||||
def test_fd(self):
|
||||
self.assertIn(os.scandir, os.supports_fd)
|
||||
self.create_file('file.txt')
|
||||
expected_names = ['file.txt']
|
||||
if support.can_symlink():
|
||||
os.symlink('file.txt', os.path.join(self.path, 'link'))
|
||||
expected_names.append('link')
|
||||
|
||||
fd = os.open(self.path, os.O_RDONLY)
|
||||
try:
|
||||
with os.scandir(fd) as it:
|
||||
entries = list(it)
|
||||
names = [entry.name for entry in entries]
|
||||
self.assertEqual(sorted(names), expected_names)
|
||||
self.assertEqual(names, os.listdir(fd))
|
||||
for entry in entries:
|
||||
self.assertEqual(entry.path, entry.name)
|
||||
self.assertEqual(os.fspath(entry), entry.name)
|
||||
self.assertEqual(entry.is_symlink(), entry.name == 'link')
|
||||
if os.stat in os.supports_dir_fd:
|
||||
st = os.stat(entry.name, dir_fd=fd)
|
||||
self.assertEqual(entry.stat(), st)
|
||||
st = os.stat(entry.name, dir_fd=fd, follow_symlinks=False)
|
||||
self.assertEqual(entry.stat(follow_symlinks=False), st)
|
||||
finally:
|
||||
os.close(fd)
|
||||
|
||||
def test_empty_path(self):
|
||||
self.assertRaises(FileNotFoundError, os.scandir, '')
|
||||
|
||||
|
@ -3328,7 +3357,7 @@ class TestScandir(unittest.TestCase):
|
|||
self.assertEqual(len(entries2), 0, entries2)
|
||||
|
||||
def test_bad_path_type(self):
|
||||
for obj in [1234, 1.234, {}, []]:
|
||||
for obj in [1.234, {}, []]:
|
||||
self.assertRaises(TypeError, os.scandir, obj)
|
||||
|
||||
def test_close(self):
|
||||
|
|
|
@ -294,6 +294,9 @@ Extension Modules
|
|||
Library
|
||||
-------
|
||||
|
||||
- bpo-25996: Added support of file descriptors in os.scandir() on Unix.
|
||||
os.fwalk() is sped up by 2 times by using os.scandir().
|
||||
|
||||
- bpo-28699: Fixed a bug in pools in multiprocessing.pool that raising an
|
||||
exception at the very first of an iterable may swallow the exception or
|
||||
make the program hang. Patch by Davin Potts and Xiang Zhang.
|
||||
|
|
|
@ -5926,7 +5926,7 @@ os_scandir(PyObject *module, PyObject **args, Py_ssize_t nargs, PyObject *kwname
|
|||
PyObject *return_value = NULL;
|
||||
static const char * const _keywords[] = {"path", NULL};
|
||||
static _PyArg_Parser _parser = {"|O&:scandir", _keywords, 0};
|
||||
path_t path = PATH_T_INITIALIZE("scandir", "path", 1, 0);
|
||||
path_t path = PATH_T_INITIALIZE("scandir", "path", 1, PATH_HAVE_FDOPENDIR);
|
||||
|
||||
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
|
||||
path_converter, &path)) {
|
||||
|
@ -6493,4 +6493,4 @@ exit:
|
|||
#ifndef OS_GETRANDOM_METHODDEF
|
||||
#define OS_GETRANDOM_METHODDEF
|
||||
#endif /* !defined(OS_GETRANDOM_METHODDEF) */
|
||||
/*[clinic end generated code: output=5a0be969e3f71660 input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=5529857101c08b49 input=a9049054013a1b77]*/
|
||||
|
|
|
@ -11161,6 +11161,7 @@ typedef struct {
|
|||
unsigned char d_type;
|
||||
#endif
|
||||
ino_t d_ino;
|
||||
int dir_fd;
|
||||
#endif
|
||||
} DirEntry;
|
||||
|
||||
|
@ -11210,19 +11211,31 @@ DirEntry_fetch_stat(DirEntry *self, int follow_symlinks)
|
|||
PyObject *ub;
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
if (PyUnicode_FSDecoder(self->path, &ub)) {
|
||||
if (!PyUnicode_FSDecoder(self->path, &ub))
|
||||
return NULL;
|
||||
const wchar_t *path = PyUnicode_AsUnicode(ub);
|
||||
#else /* POSIX */
|
||||
if (PyUnicode_FSConverter(self->path, &ub)) {
|
||||
if (!PyUnicode_FSConverter(self->path, &ub))
|
||||
return NULL;
|
||||
const char *path = PyBytes_AS_STRING(ub);
|
||||
if (self->dir_fd != DEFAULT_DIR_FD) {
|
||||
#ifdef HAVE_FSTATAT
|
||||
result = fstatat(self->dir_fd, path, &st,
|
||||
follow_symlinks ? 0 : AT_SYMLINK_NOFOLLOW);
|
||||
#else
|
||||
PyErr_SetString(PyExc_NotImplementedError, "can't fetch stat");
|
||||
return NULL;
|
||||
#endif /* HAVE_FSTATAT */
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
if (follow_symlinks)
|
||||
result = STAT(path, &st);
|
||||
else
|
||||
result = LSTAT(path, &st);
|
||||
}
|
||||
Py_DECREF(ub);
|
||||
} else
|
||||
return NULL;
|
||||
|
||||
if (result != 0)
|
||||
return path_object_error(self->path);
|
||||
|
@ -11633,20 +11646,36 @@ DirEntry_from_posix_info(path_t *path, const char *name, Py_ssize_t name_len,
|
|||
entry->stat = NULL;
|
||||
entry->lstat = NULL;
|
||||
|
||||
if (path->fd != -1) {
|
||||
entry->dir_fd = path->fd;
|
||||
joined_path = NULL;
|
||||
}
|
||||
else {
|
||||
entry->dir_fd = DEFAULT_DIR_FD;
|
||||
joined_path = join_path_filename(path->narrow, name, name_len);
|
||||
if (!joined_path)
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (!path->narrow || !PyBytes_Check(path->object)) {
|
||||
entry->name = PyUnicode_DecodeFSDefaultAndSize(name, name_len);
|
||||
if (joined_path)
|
||||
entry->path = PyUnicode_DecodeFSDefault(joined_path);
|
||||
}
|
||||
else {
|
||||
entry->name = PyBytes_FromStringAndSize(name, name_len);
|
||||
if (joined_path)
|
||||
entry->path = PyBytes_FromString(joined_path);
|
||||
}
|
||||
PyMem_Free(joined_path);
|
||||
if (!entry->name || !entry->path)
|
||||
if (!entry->name)
|
||||
goto error;
|
||||
|
||||
if (path->fd != -1) {
|
||||
entry->path = entry->name;
|
||||
Py_INCREF(entry->path);
|
||||
}
|
||||
else if (!entry->path)
|
||||
goto error;
|
||||
|
||||
#ifdef HAVE_DIRENT_D_TYPE
|
||||
|
@ -11674,6 +11703,9 @@ typedef struct {
|
|||
#else /* POSIX */
|
||||
DIR *dirp;
|
||||
#endif
|
||||
#ifdef HAVE_FDOPENDIR
|
||||
int fd;
|
||||
#endif
|
||||
} ScandirIterator;
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
|
@ -11758,6 +11790,10 @@ ScandirIterator_closedir(ScandirIterator *iterator)
|
|||
|
||||
iterator->dirp = NULL;
|
||||
Py_BEGIN_ALLOW_THREADS
|
||||
#ifdef HAVE_FDOPENDIR
|
||||
if (iterator->path.fd != -1)
|
||||
rewinddir(dirp);
|
||||
#endif
|
||||
closedir(dirp);
|
||||
Py_END_ALLOW_THREADS
|
||||
return;
|
||||
|
@ -11933,7 +11969,7 @@ static PyTypeObject ScandirIteratorType = {
|
|||
/*[clinic input]
|
||||
os.scandir
|
||||
|
||||
path : path_t(nullable=True) = None
|
||||
path : path_t(nullable=True, allow_fd='PATH_HAVE_FDOPENDIR') = None
|
||||
|
||||
Return an iterator of DirEntry objects for given path.
|
||||
|
||||
|
@ -11946,13 +11982,16 @@ If path is None, uses the path='.'.
|
|||
|
||||
static PyObject *
|
||||
os_scandir_impl(PyObject *module, path_t *path)
|
||||
/*[clinic end generated code: output=6eb2668b675ca89e input=e62b08b3cd41f604]*/
|
||||
/*[clinic end generated code: output=6eb2668b675ca89e input=b139dc1c57f60846]*/
|
||||
{
|
||||
ScandirIterator *iterator;
|
||||
#ifdef MS_WINDOWS
|
||||
wchar_t *path_strW;
|
||||
#else
|
||||
const char *path_str;
|
||||
#ifdef HAVE_FDOPENDIR
|
||||
int fd = -1;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
iterator = PyObject_New(ScandirIterator, &ScandirIteratorType);
|
||||
|
@ -11988,18 +12027,40 @@ os_scandir_impl(PyObject *module, path_t *path)
|
|||
goto error;
|
||||
}
|
||||
#else /* POSIX */
|
||||
errno = 0;
|
||||
#ifdef HAVE_FDOPENDIR
|
||||
if (path->fd != -1) {
|
||||
/* closedir() closes the FD, so we duplicate it */
|
||||
fd = _Py_dup(path->fd);
|
||||
if (fd == -1)
|
||||
goto error;
|
||||
|
||||
Py_BEGIN_ALLOW_THREADS
|
||||
iterator->dirp = fdopendir(fd);
|
||||
Py_END_ALLOW_THREADS
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
if (iterator->path.narrow)
|
||||
path_str = iterator->path.narrow;
|
||||
else
|
||||
path_str = ".";
|
||||
|
||||
errno = 0;
|
||||
Py_BEGIN_ALLOW_THREADS
|
||||
iterator->dirp = opendir(path_str);
|
||||
Py_END_ALLOW_THREADS
|
||||
}
|
||||
|
||||
if (!iterator->dirp) {
|
||||
path_error(&iterator->path);
|
||||
#ifdef HAVE_FDOPENDIR
|
||||
if (fd != -1) {
|
||||
Py_BEGIN_ALLOW_THREADS
|
||||
close(fd);
|
||||
Py_END_ALLOW_THREADS
|
||||
}
|
||||
#endif
|
||||
goto error;
|
||||
}
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue