From d4d79bc1ff91b04625c312f0219c89aabcd19ce4 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 4 Nov 2017 14:16:35 +0200 Subject: [PATCH] bpo-28564: Use os.scandir() in shutil.rmtree(). (#4085) This speeds up it to 20-40%. --- Doc/whatsnew/3.7.rst | 9 +++ Lib/shutil.py | 73 +++++++++++-------- Lib/test/test_shutil.py | 2 +- .../2017-10-23-16-22-54.bpo-28564.Tx-l-I.rst | 2 + 4 files changed, 55 insertions(+), 31 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2017-10-23-16-22-54.bpo-28564.Tx-l-I.rst diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index eb64c6a2b64..d1792dc97ef 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -440,6 +440,10 @@ Optimizations using the :func:`os.scandir` function. (Contributed by Serhiy Storchaka in :issue:`25996`.) +* The :func:`shutil.rmtree` function has been sped up to 20--40%. + This was done using the :func:`os.scandir` function. + (Contributed by Serhiy Storchaka in :issue:`28564`.) + * Optimized case-insensitive matching and searching of :mod:`regular expressions `. Searching some patterns can now be up to 20 times faster. (Contributed by Serhiy Storchaka in :issue:`30285`.) @@ -656,6 +660,11 @@ Changes in the Python API * ``repr`` for :class:`datetime.timedelta` has changed to include keyword arguments in the output. (Contributed by Utkarsh Upadhyay in :issue:`30302`.) +* Because :func:`shutil.rmtree` is now implemented using the :func:`os.scandir` + function, the user specified handler *onerror* is now called with the first + argument ``os.scandir`` instead of ``os.listdir`` when listing the direcory + is failed. + Changes in the C API -------------------- diff --git a/Lib/shutil.py b/Lib/shutil.py index 464ee912f5c..3c02776a406 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -362,25 +362,27 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, # version vulnerable to race conditions def _rmtree_unsafe(path, onerror): try: - if os.path.islink(path): - # symlinks to directories are forbidden, see bug #1669 - raise OSError("Cannot call rmtree on a symbolic link") + with os.scandir(path) as scandir_it: + entries = list(scandir_it) except OSError: - onerror(os.path.islink, path, sys.exc_info()) - # can't continue even if onerror hook returns - return - names = [] - try: - names = os.listdir(path) - except OSError: - onerror(os.listdir, path, sys.exc_info()) - for name in names: - fullname = os.path.join(path, name) + onerror(os.scandir, path, sys.exc_info()) + entries = [] + for entry in entries: + fullname = entry.path try: - mode = os.lstat(fullname).st_mode + is_dir = entry.is_dir(follow_symlinks=False) except OSError: - mode = 0 - if stat.S_ISDIR(mode): + is_dir = False + if is_dir: + try: + if entry.is_symlink(): + # This can only happen if someone replaces + # a directory with a symlink after the call to + # os.scandir or entry.is_dir above. + raise OSError("Cannot call rmtree on a symbolic link") + except OSError: + onerror(os.path.islink, fullname, sys.exc_info()) + continue _rmtree_unsafe(fullname, onerror) else: try: @@ -394,22 +396,25 @@ def _rmtree_unsafe(path, onerror): # Version using fd-based APIs to protect against races def _rmtree_safe_fd(topfd, path, onerror): - names = [] try: - names = os.listdir(topfd) + with os.scandir(topfd) as scandir_it: + entries = list(scandir_it) except OSError as err: err.filename = path - onerror(os.listdir, path, sys.exc_info()) - for name in names: - fullname = os.path.join(path, name) + onerror(os.scandir, path, sys.exc_info()) + return + for entry in entries: + fullname = os.path.join(path, entry.name) try: - orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False) - mode = orig_st.st_mode + is_dir = entry.is_dir(follow_symlinks=False) + if is_dir: + orig_st = entry.stat(follow_symlinks=False) + is_dir = stat.S_ISDIR(orig_st.st_mode) except OSError: - mode = 0 - if stat.S_ISDIR(mode): + is_dir = False + if is_dir: try: - dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd) + dirfd = os.open(entry.name, os.O_RDONLY, dir_fd=topfd) except OSError: onerror(os.open, fullname, sys.exc_info()) else: @@ -417,14 +422,14 @@ def _rmtree_safe_fd(topfd, path, onerror): if os.path.samestat(orig_st, os.fstat(dirfd)): _rmtree_safe_fd(dirfd, fullname, onerror) try: - os.rmdir(name, dir_fd=topfd) + os.rmdir(entry.name, dir_fd=topfd) except OSError: onerror(os.rmdir, fullname, sys.exc_info()) else: try: # This can only happen if someone replaces # a directory with a symlink after the call to - # stat.S_ISDIR above. + # os.scandir or stat.S_ISDIR above. raise OSError("Cannot call rmtree on a symbolic " "link") except OSError: @@ -433,13 +438,13 @@ def _rmtree_safe_fd(topfd, path, onerror): os.close(dirfd) else: try: - os.unlink(name, dir_fd=topfd) + os.unlink(entry.name, dir_fd=topfd) except OSError: onerror(os.unlink, fullname, sys.exc_info()) _use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <= os.supports_dir_fd and - os.listdir in os.supports_fd and + os.scandir in os.supports_fd and os.stat in os.supports_follow_symlinks) def rmtree(path, ignore_errors=False, onerror=None): @@ -491,6 +496,14 @@ def rmtree(path, ignore_errors=False, onerror=None): finally: os.close(fd) else: + try: + if os.path.islink(path): + # symlinks to directories are forbidden, see bug #1669 + raise OSError("Cannot call rmtree on a symbolic link") + except OSError: + onerror(os.path.islink, path, sys.exc_info()) + # can't continue even if onerror hook returns + return return _rmtree_unsafe(path, onerror) # Allow introspection of whether or not the hardening against symlink diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 3b4891ddcaf..4a72b4a0764 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -183,7 +183,7 @@ class TestShutil(unittest.TestCase): errors.append(args) shutil.rmtree(filename, onerror=onerror) self.assertEqual(len(errors), 2) - self.assertIs(errors[0][0], os.listdir) + self.assertIs(errors[0][0], os.scandir) self.assertEqual(errors[0][1], filename) self.assertIsInstance(errors[0][2][1], NotADirectoryError) self.assertIn(errors[0][2][1].filename, possible_args) diff --git a/Misc/NEWS.d/next/Library/2017-10-23-16-22-54.bpo-28564.Tx-l-I.rst b/Misc/NEWS.d/next/Library/2017-10-23-16-22-54.bpo-28564.Tx-l-I.rst new file mode 100644 index 00000000000..0889119db35 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2017-10-23-16-22-54.bpo-28564.Tx-l-I.rst @@ -0,0 +1,2 @@ +The shutil.rmtree() function has been sped up to 20--40%. This was done +using the os.scandir() function.