From 6036e4431dbc45952550c2b730fc0d1c82e30883 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 8 Mar 2015 01:58:04 +0100 Subject: [PATCH] Issue #22524: New os.scandir() function, part of the PEP 471: "os.scandir() function -- a better and faster directory iterator". Patch written by Ben Hoyt. --- Doc/library/os.rst | 177 +++++++++ Doc/whatsnew/3.5.rst | 19 + Lib/test/test_os.py | 224 ++++++++++++ Misc/NEWS | 4 + Modules/posixmodule.c | 818 ++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 1222 insertions(+), 20 deletions(-) diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 2467c60daea..0014b6c3121 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -1601,6 +1601,11 @@ features: Availability: Unix, Windows. + .. seealso:: + + The :func:`scandir` function returns the directory entries with more + information than just the name. + .. versionchanged:: 3.2 The *path* parameter became optional. @@ -1893,6 +1898,178 @@ features: The *dir_fd* parameter. +.. function:: scandir(path='.') + + Return an iterator of :class:`DirEntry` objects corresponding to the entries + in the directory given by *path*. The entries are yielded in arbitrary + order, and the special entries ``'.'`` and ``'..'`` are not included. + + On Windows, *path* must of type :class:`str`. On POSIX, *path* can be of + type :class:`str` or :class:`bytes`. If *path* is of type :class:`bytes`, + the :attr:`~DirEntry.name` and :attr:`~DirEntry.path` attributes of + :class:`DirEntry` are also of type ``bytes``. Use :func:`~os.fsencode` and + :func:`~os.fsdecode` to encode and decode paths. + + The :func:`scandir` function is recommended, instead of :func:`listdir`, + when the file type of entries is used. In most cases, the file type of a + :class:`DirEntry` is retrieved directly by :func:`scandir`, no system call + is required. If only the name of entries is used, :func:`listdir` can + be more efficient than :func:`scandir`. + + The following example shows a simple use of :func:`scandir` to display all + the files excluding directories in the given *path* that don't start with + ``'.'``:: + + for entry in os.scandir(path): + if not entry.name.startswith('.') and entry.is_file(): + print(entry.name) + + .. note:: + + On Unix-based systems, :func:`scandir` uses the system's + `opendir() `_ + and + `readdir() `_ + functions. On Windows, it uses the Win32 + `FindFirstFileW `_ + and + `FindNextFileW `_ + functions. + + .. seealso:: + + The :func:`listdir` function returns the names of the directory entries. + + .. versionadded:: 3.5 + + +.. class:: DirEntry + + Object yielded by :func:`scandir` to expose the file path and other file + attributes of a directory entry. + + :func:`scandir` will provide as much of this information as possible without + making additional system calls. When a ``stat()`` or ``lstat()`` system call + is made, the ``DirEntry`` object cache the result . + + ``DirEntry`` instances are not intended to be stored in long-lived data + structures; if you know the file metadata has changed or if a long time has + elapsed since calling :func:`scandir`, call ``os.stat(entry.path)`` to fetch + up-to-date information. + + Because the ``DirEntry`` methods can make operating system calls, they may + also raise :exc:`OSError`. For example, if a file is deleted between calling + :func:`scandir` and calling :func:`DirEntry.stat`, a + :exc:`FileNotFoundError` exception can be raised. Unfortunately, the + behaviour on errors depends on the platform. If you need very fine-grained + control over errors, you can catch :exc:`OSError` when calling one of the + ``DirEntry`` methods and handle as appropriate. + + Attributes and methods on a ``DirEntry`` instance are as follows: + + .. attribute:: name + + The entry's base filename, relative to the :func:`scandir` *path* + argument. + + The :attr:`name` type is :class:`str`. On POSIX, it can be of type + :class:`bytes` if the type of the :func:`scandir` *path* argument is also + :class:`bytes`. Use :func:`~os.fsdecode` to decode the name. + + .. attribute:: path + + The entry's full path name: equivalent to ``os.path.join(scandir_path, + entry.name)`` where *scandir_path* is the :func:`scandir` *path* + argument. The path is only absolute if the :func:`scandir` *path* + argument is absolute. + + The :attr:`name` type is :class:`str`. On POSIX, it can be of type + :class:`bytes` if the type of the :func:`scandir` *path* argument is also + :class:`bytes`. Use :func:`~os.fsdecode` to decode the path. + + .. method:: inode() + + Return the inode number of the entry. + + The result is cached in the object, use ``os.stat(entry.path, + follow_symlinks=False).st_ino`` to fetch up-to-date information. + + On POSIX, no system call is required. + + .. method:: is_dir(\*, follow_symlinks=True) + + If *follow_symlinks* is ``True`` (the default), return ``True`` if the + entry is a directory or a symbolic link pointing to a directory, + return ``False`` if it points to another kind of file, if it doesn't + exist anymore or if it is a broken symbolic link. + + If *follow_symlinks* is ``False``, return ``True`` only if this entry + is a directory, return ``False`` if it points to a symbolic link or + another kind of file, if the entry doesn't exist anymore or if it is a + broken symbolic link + + The result is cached in the object. Call :func:`stat.S_ISDIR` with + :func:`os.stat` to fetch up-to-date information. + + The method can raise :exc:`OSError`, such as :exc:`PermissionError`, + but :exc:`FileNotFoundError` is catched. + + In most cases, no system call is required. + + .. method:: is_file(\*, follow_symlinks=True) + + If *follow_symlinks* is ``True`` (the default), return ``True`` if the + entry is a regular file or a symbolic link pointing to a regular file, + return ``False`` if it points to another kind of file, if it doesn't + exist anymore or if it is a broken symbolic link. + + If *follow_symlinks* is ``False``, return ``True`` only if this entry + is a regular file, return ``False`` if it points to a symbolic link or + another kind of file, if it doesn't exist anymore or if it is a broken + symbolic link. + + The result is cached in the object. Call :func:`stat.S_ISREG` with + :func:`os.stat` to fetch up-to-date information. + + The method can raise :exc:`OSError`, such as :exc:`PermissionError`, + but :exc:`FileNotFoundError` is catched. + + In most cases, no system call is required. + + .. method:: is_symlink() + + Return ``True`` if this entry is a symbolic link or a broken symbolic + link, return ``False`` if it points to a another kind of file or if the + entry doesn't exist anymore. + + The result is cached in the object. Call :func:`os.path.islink` to fetch + up-to-date information. + + The method can raise :exc:`OSError`, such as :exc:`PermissionError`, + but :exc:`FileNotFoundError` is catched. + + In most cases, no system call is required. + + .. method:: stat(\*, follow_symlinks=True) + + Return a :class:`stat_result` object for this entry. This function + normally follows symbolic links; to stat a symbolic link add the + argument ``follow_symlinks=False``. + + On Windows, the ``st_ino``, ``st_dev`` and ``st_nlink`` attributes of the + :class:`stat_result` are always set to zero. Call :func:`os.stat` to + get these attributes. + + The result is cached in the object. Call :func:`os.stat` to fetch + up-to-date information. + + On Windows, ``DirEntry.stat(follow_symlinks=False)`` doesn't require a + system call. ``DirEntry.stat()`` requires a system call if the entry is a + symbolic link. + + .. versionadded:: 3.5 + + .. function:: stat(path, \*, dir_fd=None, follow_symlinks=True) Get the status of a file or a file descriptor. Perform the equivalent of a diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst index 8027fa38981..6932fcb866b 100644 --- a/Doc/whatsnew/3.5.rst +++ b/Doc/whatsnew/3.5.rst @@ -111,6 +111,19 @@ Please read on for a comprehensive list of user-facing changes. PEP written by Carl Meyer + +PEP 471 - os.scandir() function -- a better and faster directory iterator +------------------------------------------------------------------------- + +:pep:`471` includes a new directory iteration function, :func:`os.scandir`, +in the standard library. + +.. seealso:: + + :pep:`471` -- os.scandir() function -- a better and faster directory + iterator. + + PEP 475: Retry system calls failing with EINTR ---------------------------------------------- @@ -118,6 +131,8 @@ PEP 475: Retry system calls failing with EINTR this means that user code doesn't have to deal with EINTR or InterruptedError manually, and should make it more robust against asynchronous signal reception. +PEP and implementation written by Ben Hoyt with the help of Victor Stinner. + .. seealso:: :pep:`475` -- Retry system calls failing with EINTR @@ -279,6 +294,10 @@ json os -- +* New :func:`os.scandir` function: Return an iterator of :class:`os.DirEntry` + objects corresponding to the entries in the directory given by *path*. + (Implementation written by Ben Hoyt with the help of Victor Stinner.) + * :class:`os.stat_result` now has a :attr:`~os.stat_result.st_file_attributes` attribute on Windows. (Contributed by Ben Hoyt in :issue:`21719`.) diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index afa7a4a1d1c..e5996c4a763 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -2698,5 +2698,229 @@ class ExportsTests(unittest.TestCase): self.assertIn('walk', os.__all__) +class TestScandir(unittest.TestCase): + def setUp(self): + self.path = os.path.realpath(support.TESTFN) + self.addCleanup(support.rmtree, self.path) + os.mkdir(self.path) + + def create_file(self, name="file.txt"): + filename = os.path.join(self.path, name) + with open(filename, "wb") as fp: + fp.write(b'python') + return filename + + def get_entries(self, names): + entries = dict((entry.name, entry) + for entry in os.scandir(self.path)) + self.assertEqual(sorted(entries.keys()), names) + return entries + + def assert_stat_equal(self, stat1, stat2, skip_fields): + if skip_fields: + for attr in dir(stat1): + if not attr.startswith("st_"): + continue + if attr in ("st_dev", "st_ino", "st_nlink"): + continue + self.assertEqual(getattr(stat1, attr), + getattr(stat2, attr), + (stat1, stat2, attr)) + else: + self.assertEqual(stat1, stat2) + + def check_entry(self, entry, name, is_dir, is_file, is_symlink): + self.assertEqual(entry.name, name) + self.assertEqual(entry.path, os.path.join(self.path, name)) + self.assertEqual(entry.inode(), + os.stat(entry.path, follow_symlinks=False).st_ino) + + entry_stat = os.stat(entry.path) + self.assertEqual(entry.is_dir(), + stat.S_ISDIR(entry_stat.st_mode)) + self.assertEqual(entry.is_file(), + stat.S_ISREG(entry_stat.st_mode)) + self.assertEqual(entry.is_symlink(), + os.path.islink(entry.path)) + + entry_lstat = os.stat(entry.path, follow_symlinks=False) + self.assertEqual(entry.is_dir(follow_symlinks=False), + stat.S_ISDIR(entry_lstat.st_mode)) + self.assertEqual(entry.is_file(follow_symlinks=False), + stat.S_ISREG(entry_lstat.st_mode)) + + self.assert_stat_equal(entry.stat(), + entry_stat, + os.name == 'nt' and not is_symlink) + self.assert_stat_equal(entry.stat(follow_symlinks=False), + entry_lstat, + os.name == 'nt') + + def test_attributes(self): + link = hasattr(os, 'link') + symlink = support.can_symlink() + + dirname = os.path.join(self.path, "dir") + os.mkdir(dirname) + filename = self.create_file("file.txt") + if link: + os.link(filename, os.path.join(self.path, "link_file.txt")) + if symlink: + os.symlink(dirname, os.path.join(self.path, "symlink_dir"), + target_is_directory=True) + os.symlink(filename, os.path.join(self.path, "symlink_file.txt")) + + names = ['dir', 'file.txt'] + if link: + names.append('link_file.txt') + if symlink: + names.extend(('symlink_dir', 'symlink_file.txt')) + entries = self.get_entries(names) + + entry = entries['dir'] + self.check_entry(entry, 'dir', True, False, False) + + entry = entries['file.txt'] + self.check_entry(entry, 'file.txt', False, True, False) + + if link: + entry = entries['link_file.txt'] + self.check_entry(entry, 'link_file.txt', False, True, False) + + if symlink: + entry = entries['symlink_dir'] + self.check_entry(entry, 'symlink_dir', True, False, True) + + entry = entries['symlink_file.txt'] + self.check_entry(entry, 'symlink_file.txt', False, True, True) + + def get_entry(self, name): + entries = list(os.scandir(self.path)) + self.assertEqual(len(entries), 1) + + entry = entries[0] + self.assertEqual(entry.name, name) + return entry + + def create_file_entry(self): + filename = self.create_file() + return self.get_entry(os.path.basename(filename)) + + def test_current_directory(self): + filename = self.create_file() + old_dir = os.getcwd() + try: + os.chdir(self.path) + + # call scandir() without parameter: it must list the content + # of the current directory + entries = dict((entry.name, entry) for entry in os.scandir()) + self.assertEqual(sorted(entries.keys()), + [os.path.basename(filename)]) + finally: + os.chdir(old_dir) + + def test_repr(self): + entry = self.create_file_entry() + self.assertEqual(repr(entry), "") + + def test_removed_dir(self): + path = os.path.join(self.path, 'dir') + + os.mkdir(path) + entry = self.get_entry('dir') + os.rmdir(path) + + # On POSIX, is_dir() result depends if scandir() filled d_type or not + if os.name == 'nt': + self.assertTrue(entry.is_dir()) + self.assertFalse(entry.is_file()) + self.assertFalse(entry.is_symlink()) + if os.name == 'nt': + self.assertRaises(FileNotFoundError, entry.inode) + # don't fail + entry.stat() + entry.stat(follow_symlinks=False) + else: + self.assertGreater(entry.inode(), 0) + self.assertRaises(FileNotFoundError, entry.stat) + self.assertRaises(FileNotFoundError, entry.stat, follow_symlinks=False) + + def test_removed_file(self): + entry = self.create_file_entry() + os.unlink(entry.path) + + self.assertFalse(entry.is_dir()) + # On POSIX, is_dir() result depends if scandir() filled d_type or not + if os.name == 'nt': + self.assertTrue(entry.is_file()) + self.assertFalse(entry.is_symlink()) + if os.name == 'nt': + self.assertRaises(FileNotFoundError, entry.inode) + # don't fail + entry.stat() + entry.stat(follow_symlinks=False) + else: + self.assertGreater(entry.inode(), 0) + self.assertRaises(FileNotFoundError, entry.stat) + self.assertRaises(FileNotFoundError, entry.stat, follow_symlinks=False) + + def test_broken_symlink(self): + if not support.can_symlink(): + return self.skipTest('cannot create symbolic link') + + filename = self.create_file("file.txt") + os.symlink(filename, + os.path.join(self.path, "symlink.txt")) + entries = self.get_entries(['file.txt', 'symlink.txt']) + entry = entries['symlink.txt'] + os.unlink(filename) + + self.assertGreater(entry.inode(), 0) + self.assertFalse(entry.is_dir()) + self.assertFalse(entry.is_file()) # broken symlink returns False + self.assertFalse(entry.is_dir(follow_symlinks=False)) + self.assertFalse(entry.is_file(follow_symlinks=False)) + self.assertTrue(entry.is_symlink()) + self.assertRaises(FileNotFoundError, entry.stat) + # don't fail + entry.stat(follow_symlinks=False) + + def test_bytes(self): + if os.name == "nt": + # On Windows, os.scandir(bytes) must raise an exception + self.assertRaises(TypeError, os.scandir, b'.') + return + + self.create_file("file.txt") + + path_bytes = os.fsencode(self.path) + entries = list(os.scandir(path_bytes)) + self.assertEqual(len(entries), 1, entries) + entry = entries[0] + + self.assertEqual(entry.name, b'file.txt') + self.assertEqual(entry.path, + os.fsencode(os.path.join(self.path, 'file.txt'))) + + def test_empty_path(self): + self.assertRaises(FileNotFoundError, os.scandir, '') + + def test_consume_iterator_twice(self): + self.create_file("file.txt") + iterator = os.scandir(self.path) + + entries = list(iterator) + self.assertEqual(len(entries), 1, entries) + + # check than consuming the iterator twice doesn't raise exception + entries2 = list(iterator) + self.assertEqual(len(entries2), 0, entries2) + + def test_bad_path_type(self): + for obj in [1234, 1.234, {}, []]: + self.assertRaises(TypeError, os.scandir, obj) + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS b/Misc/NEWS index 4e9b2f03ff4..343cf8f0c26 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -17,6 +17,10 @@ Core and Builtins Library ------- +- Issue #22524: New os.scandir() function, part of the PEP 471: "os.scandir() + function -- a better and faster directory iterator". Patch written by Ben + Hoyt. + - Issue #23103: Reduced the memory consumption of IPv4Address and IPv6Address. - Issue #21793: BaseHTTPRequestHandler again logs response code as numeric, diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 679fc8f3e89..c00113e33e7 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -25,6 +25,7 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" +#include "structmember.h" #ifndef MS_WINDOWS #include "posixmodule.h" #else @@ -372,6 +373,14 @@ static int win32_can_symlink = 0; #define DWORD_MAX 4294967295U +#ifdef MS_WINDOWS +#define INITFUNC PyInit_nt +#define MODNAME "nt" +#else +#define INITFUNC PyInit_posix +#define MODNAME "posix" +#endif + #ifdef MS_WINDOWS /* defined in fileutils.c */ PyAPI_FUNC(void) _Py_time_t_to_FILE_TIME(time_t, int, FILETIME *); @@ -1396,6 +1405,25 @@ attributes_from_dir(LPCSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *rep return TRUE; } +static void +find_data_to_file_info_w(WIN32_FIND_DATAW *pFileData, + BY_HANDLE_FILE_INFORMATION *info, + ULONG *reparse_tag) +{ + memset(info, 0, sizeof(*info)); + info->dwFileAttributes = pFileData->dwFileAttributes; + info->ftCreationTime = pFileData->ftCreationTime; + info->ftLastAccessTime = pFileData->ftLastAccessTime; + info->ftLastWriteTime = pFileData->ftLastWriteTime; + info->nFileSizeHigh = pFileData->nFileSizeHigh; + info->nFileSizeLow = pFileData->nFileSizeLow; +/* info->nNumberOfLinks = 1; */ + if (pFileData->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) + *reparse_tag = pFileData->dwReserved0; + else + *reparse_tag = 0; +} + static BOOL attributes_from_dir_w(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *reparse_tag) { @@ -1405,17 +1433,7 @@ attributes_from_dir_w(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG * if (hFindFile == INVALID_HANDLE_VALUE) return FALSE; FindClose(hFindFile); - memset(info, 0, sizeof(*info)); - *reparse_tag = 0; - info->dwFileAttributes = FileData.dwFileAttributes; - info->ftCreationTime = FileData.ftCreationTime; - info->ftLastAccessTime = FileData.ftLastAccessTime; - info->ftLastWriteTime = FileData.ftLastWriteTime; - info->nFileSizeHigh = FileData.nFileSizeHigh; - info->nFileSizeLow = FileData.nFileSizeLow; -/* info->nNumberOfLinks = 1; */ - if (FileData.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) - *reparse_tag = FileData.dwReserved0; + find_data_to_file_info_w(&FileData, info, reparse_tag); return TRUE; } @@ -16330,6 +16348,766 @@ posix_set_blocking(PyObject *self, PyObject *args) #endif /* !MS_WINDOWS */ +PyDoc_STRVAR(posix_scandir__doc__, +"scandir(path='.') -> iterator of DirEntry objects for given path"); + +static char *follow_symlinks_keywords[] = {"follow_symlinks", NULL}; + +typedef struct { + PyObject_HEAD + PyObject *name; + PyObject *path; + PyObject *stat; + PyObject *lstat; +#ifdef MS_WINDOWS + struct _Py_stat_struct win32_lstat; + __int64 win32_file_index; + int got_file_index; +#else /* POSIX */ + unsigned char d_type; + ino_t d_ino; +#endif +} DirEntry; + +static void +DirEntry_dealloc(DirEntry *entry) +{ + Py_XDECREF(entry->name); + Py_XDECREF(entry->path); + Py_XDECREF(entry->stat); + Py_XDECREF(entry->lstat); + Py_TYPE(entry)->tp_free((PyObject *)entry); +} + +/* Forward reference */ +static int +DirEntry_test_mode(DirEntry *self, int follow_symlinks, unsigned short mode_bits); + +/* Set exception and return -1 on error, 0 for False, 1 for True */ +static int +DirEntry_is_symlink(DirEntry *self) +{ +#ifdef MS_WINDOWS + return (self->win32_lstat.st_mode & S_IFMT) == S_IFLNK; +#else /* POSIX */ + if (self->d_type != DT_UNKNOWN) + return self->d_type == DT_LNK; + else + return DirEntry_test_mode(self, 0, S_IFLNK); +#endif +} + +static PyObject * +DirEntry_py_is_symlink(DirEntry *self) +{ + int result; + + result = DirEntry_is_symlink(self); + if (result == -1) + return NULL; + return PyBool_FromLong(result); +} + +static PyObject * +DirEntry_fetch_stat(DirEntry *self, int follow_symlinks) +{ + int result; + struct _Py_stat_struct st; + +#ifdef MS_WINDOWS + wchar_t *path; + + path = PyUnicode_AsUnicode(self->path); + if (!path) + return NULL; + + if (follow_symlinks) + result = win32_stat_w(path, &st); + else + result = win32_lstat_w(path, &st); + + if (result != 0) { + return PyErr_SetExcFromWindowsErrWithFilenameObject(PyExc_OSError, + 0, self->path); + } +#else /* POSIX */ + PyObject *bytes; + char *path; + + if (!PyUnicode_FSConverter(self->path, &bytes)) + return NULL; + path = PyBytes_AS_STRING(bytes); + + if (follow_symlinks) + result = STAT(path, &st); + else + result = LSTAT(path, &st); + Py_DECREF(bytes); + + if (result != 0) + return PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, self->path); +#endif + + return _pystat_fromstructstat(&st); +} + +static PyObject * +DirEntry_get_lstat(DirEntry *self) +{ + if (!self->lstat) { +#ifdef MS_WINDOWS + self->lstat = _pystat_fromstructstat(&self->win32_lstat); +#else /* POSIX */ + self->lstat = DirEntry_fetch_stat(self, 0); +#endif + } + Py_XINCREF(self->lstat); + return self->lstat; +} + +static PyObject * +DirEntry_get_stat(DirEntry *self, int follow_symlinks) +{ + if (!follow_symlinks) + return DirEntry_get_lstat(self); + + if (!self->stat) { + int result = DirEntry_is_symlink(self); + if (result == -1) + return NULL; + else if (result) + self->stat = DirEntry_fetch_stat(self, 1); + else + self->stat = DirEntry_get_lstat(self); + } + + Py_XINCREF(self->stat); + return self->stat; +} + +static PyObject * +DirEntry_stat(DirEntry *self, PyObject *args, PyObject *kwargs) +{ + int follow_symlinks = 1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|$p:DirEntry.stat", + follow_symlinks_keywords, &follow_symlinks)) + return NULL; + + return DirEntry_get_stat(self, follow_symlinks); +} + +/* Set exception and return -1 on error, 0 for False, 1 for True */ +static int +DirEntry_test_mode(DirEntry *self, int follow_symlinks, unsigned short mode_bits) +{ + PyObject *stat = NULL; + PyObject *st_mode = NULL; + long mode; + int result; + int is_symlink; + int need_stat; + _Py_IDENTIFIER(st_mode); +#ifdef MS_WINDOWS + unsigned long dir_bits; +#endif + +#ifdef MS_WINDOWS + is_symlink = (self->win32_lstat.st_mode & S_IFMT) == S_IFLNK; + need_stat = follow_symlinks && is_symlink; +#else /* POSIX */ + is_symlink = self->d_type == DT_LNK; + need_stat = self->d_type == DT_UNKNOWN || (follow_symlinks && is_symlink); +#endif + + if (need_stat) { + stat = DirEntry_get_stat(self, follow_symlinks); + if (!stat) { + if (PyErr_ExceptionMatches(PyExc_FileNotFoundError)) { + /* If file doesn't exist (anymore), then return False + (i.e., say it's not a file/directory) */ + PyErr_Clear(); + return 0; + } + goto error; + } + st_mode = _PyObject_GetAttrId(stat, &PyId_st_mode); + if (!st_mode) + goto error; + + mode = PyLong_AsLong(st_mode); + if (mode == -1 && PyErr_Occurred()) + goto error; + Py_CLEAR(st_mode); + Py_CLEAR(stat); + result = (mode & S_IFMT) == mode_bits; + } + else if (is_symlink) { + assert(mode_bits != S_IFLNK); + result = 0; + } + else { + assert(mode_bits == S_IFDIR || mode_bits == S_IFREG); +#ifdef MS_WINDOWS + dir_bits = self->win32_lstat.st_file_attributes & FILE_ATTRIBUTE_DIRECTORY; + if (mode_bits == S_IFDIR) + result = dir_bits != 0; + else + result = dir_bits == 0; +#else /* POSIX */ + if (mode_bits == S_IFDIR) + result = self->d_type == DT_DIR; + else + result = self->d_type == DT_REG; +#endif + } + + return result; + +error: + Py_XDECREF(st_mode); + Py_XDECREF(stat); + return -1; +} + +static PyObject * +DirEntry_py_test_mode(DirEntry *self, int follow_symlinks, unsigned short mode_bits) +{ + int result; + + result = DirEntry_test_mode(self, follow_symlinks, mode_bits); + if (result == -1) + return NULL; + return PyBool_FromLong(result); +} + +static PyObject * +DirEntry_is_dir(DirEntry *self, PyObject *args, PyObject *kwargs) +{ + int follow_symlinks = 1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|$p:DirEntry.is_dir", + follow_symlinks_keywords, &follow_symlinks)) + return NULL; + + return DirEntry_py_test_mode(self, follow_symlinks, S_IFDIR); +} + +static PyObject * +DirEntry_is_file(DirEntry *self, PyObject *args, PyObject *kwargs) +{ + int follow_symlinks = 1; + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|$p:DirEntry.is_file", + follow_symlinks_keywords, &follow_symlinks)) + return NULL; + + return DirEntry_py_test_mode(self, follow_symlinks, S_IFREG); +} + +static PyObject * +DirEntry_inode(DirEntry *self) +{ +#ifdef MS_WINDOWS + if (!self->got_file_index) { + wchar_t *path; + struct _Py_stat_struct stat; + + path = PyUnicode_AsUnicode(self->path); + if (!path) + return NULL; + + if (win32_lstat_w(path, &stat) != 0) { + return PyErr_SetExcFromWindowsErrWithFilenameObject(PyExc_OSError, + 0, self->path); + } + + self->win32_file_index = stat.st_ino; + self->got_file_index = 1; + } + return PyLong_FromLongLong((PY_LONG_LONG)self->win32_file_index); +#else /* POSIX */ +#ifdef HAVE_LARGEFILE_SUPPORT + return PyLong_FromLongLong((PY_LONG_LONG)self->d_ino); +#else + return PyLong_FromLong((long)self->d_ino); +#endif +#endif +} + +static PyObject * +DirEntry_repr(DirEntry *self) +{ + return PyUnicode_FromFormat("", self->name); +} + +static PyMemberDef DirEntry_members[] = { + {"name", T_OBJECT_EX, offsetof(DirEntry, name), READONLY, + "the entry's base filename, relative to scandir() \"path\" argument"}, + {"path", T_OBJECT_EX, offsetof(DirEntry, path), READONLY, + "the entry's full path name; equivalent to os.path.join(scandir_path, entry.name)"}, + {NULL} +}; + +static PyMethodDef DirEntry_methods[] = { + {"is_dir", (PyCFunction)DirEntry_is_dir, METH_VARARGS | METH_KEYWORDS, + "return True if the entry is a directory; cached per entry" + }, + {"is_file", (PyCFunction)DirEntry_is_file, METH_VARARGS | METH_KEYWORDS, + "return True if the entry is a file; cached per entry" + }, + {"is_symlink", (PyCFunction)DirEntry_py_is_symlink, METH_NOARGS, + "return True if the entry is a symbolic link; cached per entry" + }, + {"stat", (PyCFunction)DirEntry_stat, METH_VARARGS | METH_KEYWORDS, + "return stat_result object for the entry; cached per entry" + }, + {"inode", (PyCFunction)DirEntry_inode, METH_NOARGS, + "return inode of the entry; cached per entry", + }, + {NULL} +}; + +PyTypeObject DirEntryType = { + PyVarObject_HEAD_INIT(NULL, 0) + MODNAME ".DirEntry", /* tp_name */ + sizeof(DirEntry), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)DirEntry_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + (reprfunc)DirEntry_repr, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + DirEntry_methods, /* tp_methods */ + DirEntry_members, /* tp_members */ +}; + +#ifdef MS_WINDOWS + +static wchar_t * +join_path_filenameW(wchar_t *path_wide, wchar_t* filename) +{ + Py_ssize_t path_len; + Py_ssize_t size; + wchar_t *result; + wchar_t ch; + + if (!path_wide) { /* Default arg: "." */ + path_wide = L"."; + path_len = 1; + } + else { + path_len = wcslen(path_wide); + } + + /* The +1's are for the path separator and the NUL */ + size = path_len + 1 + wcslen(filename) + 1; + result = PyMem_New(wchar_t, size); + if (!result) { + PyErr_NoMemory(); + return NULL; + } + wcscpy(result, path_wide); + if (path_len > 0) { + ch = result[path_len - 1]; + if (ch != SEP && ch != ALTSEP && ch != L':') + result[path_len++] = SEP; + wcscpy(result + path_len, filename); + } + return result; +} + +static PyObject * +DirEntry_from_find_data(path_t *path, WIN32_FIND_DATAW *dataW) +{ + DirEntry *entry; + BY_HANDLE_FILE_INFORMATION file_info; + ULONG reparse_tag; + wchar_t *joined_path; + + entry = PyObject_New(DirEntry, &DirEntryType); + if (!entry) + return NULL; + entry->name = NULL; + entry->path = NULL; + entry->stat = NULL; + entry->lstat = NULL; + entry->got_file_index = 0; + + entry->name = PyUnicode_FromWideChar(dataW->cFileName, -1); + if (!entry->name) + goto error; + + joined_path = join_path_filenameW(path->wide, dataW->cFileName); + if (!joined_path) + goto error; + + entry->path = PyUnicode_FromWideChar(joined_path, -1); + PyMem_Free(joined_path); + if (!entry->path) + goto error; + + find_data_to_file_info_w(dataW, &file_info, &reparse_tag); + _Py_attribute_data_to_stat(&file_info, reparse_tag, &entry->win32_lstat); + + return (PyObject *)entry; + +error: + Py_DECREF(entry); + return NULL; +} + +#else /* POSIX */ + +static char * +join_path_filename(char *path_narrow, char* filename, Py_ssize_t filename_len) +{ + Py_ssize_t path_len; + Py_ssize_t size; + char *result; + + if (!path_narrow) { /* Default arg: "." */ + path_narrow = "."; + path_len = 1; + } + else { + path_len = strlen(path_narrow); + } + + if (filename_len == -1) + filename_len = strlen(filename); + + /* The +1's are for the path separator and the NUL */ + size = path_len + 1 + filename_len + 1; + result = PyMem_New(char, size); + if (!result) { + PyErr_NoMemory(); + return NULL; + } + strcpy(result, path_narrow); + if (path_len > 0 && result[path_len - 1] != '/') + result[path_len++] = '/'; + strcpy(result + path_len, filename); + return result; +} + +static PyObject * +DirEntry_from_posix_info(path_t *path, char *name, Py_ssize_t name_len, + unsigned char d_type, ino_t d_ino) +{ + DirEntry *entry; + char *joined_path; + + entry = PyObject_New(DirEntry, &DirEntryType); + if (!entry) + return NULL; + entry->name = NULL; + entry->path = NULL; + entry->stat = NULL; + entry->lstat = NULL; + + joined_path = join_path_filename(path->narrow, name, name_len); + if (!joined_path) + goto error; + + if (!path->narrow || !PyBytes_Check(path->object)) { + entry->name = PyUnicode_DecodeFSDefaultAndSize(name, name_len); + entry->path = PyUnicode_DecodeFSDefault(joined_path); + } + else { + entry->name = PyBytes_FromStringAndSize(name, name_len); + entry->path = PyBytes_FromString(joined_path); + } + PyMem_Free(joined_path); + if (!entry->name || !entry->path) + goto error; + + entry->d_type = d_type; + entry->d_ino = d_ino; + + return (PyObject *)entry; + +error: + Py_XDECREF(entry); + return NULL; +} + +#endif + + +typedef struct { + PyObject_HEAD + path_t path; +#ifdef MS_WINDOWS + HANDLE handle; + WIN32_FIND_DATAW file_data; + int first_time; +#else /* POSIX */ + DIR *dirp; +#endif +} ScandirIterator; + +#ifdef MS_WINDOWS + +static void +ScandirIterator_close(ScandirIterator *iterator) +{ + if (iterator->handle == INVALID_HANDLE_VALUE) + return; + + Py_BEGIN_ALLOW_THREADS + FindClose(iterator->handle); + Py_END_ALLOW_THREADS + iterator->handle = INVALID_HANDLE_VALUE; +} + +static PyObject * +ScandirIterator_iternext(ScandirIterator *iterator) +{ + WIN32_FIND_DATAW *file_data = &iterator->file_data; + BOOL success; + + /* Happens if the iterator is iterated twice */ + if (iterator->handle == INVALID_HANDLE_VALUE) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + while (1) { + if (!iterator->first_time) { + Py_BEGIN_ALLOW_THREADS + success = FindNextFileW(iterator->handle, file_data); + Py_END_ALLOW_THREADS + if (!success) { + if (GetLastError() != ERROR_NO_MORE_FILES) + return path_error(&iterator->path); + /* No more files found in directory, stop iterating */ + break; + } + } + iterator->first_time = 0; + + /* Skip over . and .. */ + if (wcscmp(file_data->cFileName, L".") != 0 && + wcscmp(file_data->cFileName, L"..") != 0) + return DirEntry_from_find_data(&iterator->path, file_data); + + /* Loop till we get a non-dot directory or finish iterating */ + } + + ScandirIterator_close(iterator); + + PyErr_SetNone(PyExc_StopIteration); + return NULL; +} + +#else /* POSIX */ + +static void +ScandirIterator_close(ScandirIterator *iterator) +{ + if (!iterator->dirp) + return; + + Py_BEGIN_ALLOW_THREADS + closedir(iterator->dirp); + Py_END_ALLOW_THREADS + iterator->dirp = NULL; + return; +} + +static PyObject * +ScandirIterator_iternext(ScandirIterator *iterator) +{ + struct dirent *direntp; + Py_ssize_t name_len; + int is_dot; + unsigned char d_type; + + /* Happens if the iterator is iterated twice */ + if (!iterator->dirp) { + PyErr_SetNone(PyExc_StopIteration); + return NULL; + } + + while (1) { + errno = 0; + Py_BEGIN_ALLOW_THREADS + direntp = readdir(iterator->dirp); + Py_END_ALLOW_THREADS + + if (!direntp) { + if (errno != 0) + return path_error(&iterator->path); + /* No more files found in directory, stop iterating */ + break; + } + + /* Skip over . and .. */ + name_len = NAMLEN(direntp); + is_dot = direntp->d_name[0] == '.' && + (name_len == 1 || (direntp->d_name[1] == '.' && name_len == 2)); + if (!is_dot) { +#if defined(__GLIBC__) && !defined(_DIRENT_HAVE_D_TYPE) + d_type = DT_UNKNOWN; /* System doesn't support d_type */ +#else + d_type = direntp->d_type; +#endif + return DirEntry_from_posix_info(&iterator->path, direntp->d_name, + name_len, d_type, direntp->d_ino); + } + + /* Loop till we get a non-dot directory or finish iterating */ + } + + ScandirIterator_close(iterator); + + PyErr_SetNone(PyExc_StopIteration); + return NULL; +} + +#endif + +static void +ScandirIterator_dealloc(ScandirIterator *iterator) +{ + ScandirIterator_close(iterator); + Py_XDECREF(iterator->path.object); + path_cleanup(&iterator->path); + Py_TYPE(iterator)->tp_free((PyObject *)iterator); +} + +PyTypeObject ScandirIteratorType = { + PyVarObject_HEAD_INIT(NULL, 0) + MODNAME ".ScandirIterator", /* tp_name */ + sizeof(ScandirIterator), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)ScandirIterator_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)ScandirIterator_iternext, /* tp_iternext */ +}; + +static PyObject * +posix_scandir(PyObject *self, PyObject *args, PyObject *kwargs) +{ + ScandirIterator *iterator; + static char *keywords[] = {"path", NULL}; +#ifdef MS_WINDOWS + wchar_t *path_strW; +#else + char *path; +#endif + + iterator = PyObject_New(ScandirIterator, &ScandirIteratorType); + if (!iterator) + return NULL; + memset(&iterator->path, 0, sizeof(path_t)); + iterator->path.function_name = "scandir"; + iterator->path.nullable = 1; + +#ifdef MS_WINDOWS + iterator->handle = INVALID_HANDLE_VALUE; +#else + iterator->dirp = NULL; +#endif + + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O&:scandir", keywords, + path_converter, &iterator->path)) + goto error; + + /* path_converter doesn't keep path.object around, so do it + manually for the lifetime of the iterator here (the refcount + is decremented in ScandirIterator_dealloc) + */ + Py_XINCREF(iterator->path.object); + +#ifdef MS_WINDOWS + if (iterator->path.narrow) { + PyErr_SetString(PyExc_TypeError, + "os.scandir() doesn't support bytes path on Windows, use Unicode instead"); + goto error; + } + iterator->first_time = 1; + + path_strW = join_path_filenameW(iterator->path.wide, L"*.*"); + if (!path_strW) + goto error; + + Py_BEGIN_ALLOW_THREADS + iterator->handle = FindFirstFileW(path_strW, &iterator->file_data); + Py_END_ALLOW_THREADS + + PyMem_Free(path_strW); + + if (iterator->handle == INVALID_HANDLE_VALUE) { + path_error(&iterator->path); + goto error; + } +#else /* POSIX */ + if (iterator->path.narrow) + path = iterator->path.narrow; + else + path = "."; + + errno = 0; + Py_BEGIN_ALLOW_THREADS + iterator->dirp = opendir(path); + Py_END_ALLOW_THREADS + + if (!iterator->dirp) { + path_error(&iterator->path); + goto error; + } +#endif + + return (PyObject *)iterator; + +error: + Py_DECREF(iterator); + return NULL; +} + + /*[clinic input] dump buffer [clinic start generated code]*/ @@ -17002,6 +17780,9 @@ static PyMethodDef posix_methods[] = { {"get_blocking", posix_get_blocking, METH_VARARGS, get_blocking__doc__}, {"set_blocking", posix_set_blocking, METH_VARARGS, set_blocking__doc__}, #endif + {"scandir", (PyCFunction)posix_scandir, + METH_VARARGS | METH_KEYWORDS, + posix_scandir__doc__}, {NULL, NULL} /* Sentinel */ }; @@ -17444,15 +18225,6 @@ all_ins(PyObject *m) } -#ifdef MS_WINDOWS -#define INITFUNC PyInit_nt -#define MODNAME "nt" - -#else -#define INITFUNC PyInit_posix -#define MODNAME "posix" -#endif - static struct PyModuleDef posixmodule = { PyModuleDef_HEAD_INIT, MODNAME, @@ -17673,6 +18445,12 @@ INITFUNC(void) if (PyStructSequence_InitType2(&TerminalSizeType, &TerminalSize_desc) < 0) return NULL; + + /* initialize scandir types */ + if (PyType_Ready(&ScandirIteratorType) < 0) + return NULL; + if (PyType_Ready(&DirEntryType) < 0) + return NULL; } #if defined(HAVE_WAITID) && !defined(__APPLE__) Py_INCREF((PyObject*) &WaitidResultType);