bpo-28564: Use os.scandir() in shutil.rmtree(). (#4085)

This speeds up it to 20-40%.
This commit is contained in:
Serhiy Storchaka 2017-11-04 14:16:35 +02:00 committed by GitHub
parent 82cd3cede8
commit d4d79bc1ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 55 additions and 31 deletions

View File

@ -440,6 +440,10 @@ Optimizations
using the :func:`os.scandir` function. using the :func:`os.scandir` function.
(Contributed by Serhiy Storchaka in :issue:`25996`.) (Contributed by Serhiy Storchaka in :issue:`25996`.)
* The :func:`shutil.rmtree` function has been sped up to 20--40%.
This was done using the :func:`os.scandir` function.
(Contributed by Serhiy Storchaka in :issue:`28564`.)
* Optimized case-insensitive matching and searching of :mod:`regular * Optimized case-insensitive matching and searching of :mod:`regular
expressions <re>`. Searching some patterns can now be up to 20 times faster. expressions <re>`. Searching some patterns can now be up to 20 times faster.
(Contributed by Serhiy Storchaka in :issue:`30285`.) (Contributed by Serhiy Storchaka in :issue:`30285`.)
@ -656,6 +660,11 @@ Changes in the Python API
* ``repr`` for :class:`datetime.timedelta` has changed to include keyword arguments * ``repr`` for :class:`datetime.timedelta` has changed to include keyword arguments
in the output. (Contributed by Utkarsh Upadhyay in :issue:`30302`.) in the output. (Contributed by Utkarsh Upadhyay in :issue:`30302`.)
* Because :func:`shutil.rmtree` is now implemented using the :func:`os.scandir`
function, the user specified handler *onerror* is now called with the first
argument ``os.scandir`` instead of ``os.listdir`` when listing the direcory
is failed.
Changes in the C API Changes in the C API
-------------------- --------------------

View File

@ -362,25 +362,27 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
# version vulnerable to race conditions # version vulnerable to race conditions
def _rmtree_unsafe(path, onerror): def _rmtree_unsafe(path, onerror):
try: try:
if os.path.islink(path): with os.scandir(path) as scandir_it:
# symlinks to directories are forbidden, see bug #1669 entries = list(scandir_it)
raise OSError("Cannot call rmtree on a symbolic link")
except OSError: except OSError:
onerror(os.path.islink, path, sys.exc_info()) onerror(os.scandir, path, sys.exc_info())
# can't continue even if onerror hook returns entries = []
return for entry in entries:
names = [] fullname = entry.path
try:
names = os.listdir(path)
except OSError:
onerror(os.listdir, path, sys.exc_info())
for name in names:
fullname = os.path.join(path, name)
try: try:
mode = os.lstat(fullname).st_mode is_dir = entry.is_dir(follow_symlinks=False)
except OSError: except OSError:
mode = 0 is_dir = False
if stat.S_ISDIR(mode): if is_dir:
try:
if entry.is_symlink():
# This can only happen if someone replaces
# a directory with a symlink after the call to
# os.scandir or entry.is_dir above.
raise OSError("Cannot call rmtree on a symbolic link")
except OSError:
onerror(os.path.islink, fullname, sys.exc_info())
continue
_rmtree_unsafe(fullname, onerror) _rmtree_unsafe(fullname, onerror)
else: else:
try: try:
@ -394,22 +396,25 @@ def _rmtree_unsafe(path, onerror):
# Version using fd-based APIs to protect against races # Version using fd-based APIs to protect against races
def _rmtree_safe_fd(topfd, path, onerror): def _rmtree_safe_fd(topfd, path, onerror):
names = []
try: try:
names = os.listdir(topfd) with os.scandir(topfd) as scandir_it:
entries = list(scandir_it)
except OSError as err: except OSError as err:
err.filename = path err.filename = path
onerror(os.listdir, path, sys.exc_info()) onerror(os.scandir, path, sys.exc_info())
for name in names: return
fullname = os.path.join(path, name) for entry in entries:
fullname = os.path.join(path, entry.name)
try: try:
orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False) is_dir = entry.is_dir(follow_symlinks=False)
mode = orig_st.st_mode if is_dir:
orig_st = entry.stat(follow_symlinks=False)
is_dir = stat.S_ISDIR(orig_st.st_mode)
except OSError: except OSError:
mode = 0 is_dir = False
if stat.S_ISDIR(mode): if is_dir:
try: try:
dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd) dirfd = os.open(entry.name, os.O_RDONLY, dir_fd=topfd)
except OSError: except OSError:
onerror(os.open, fullname, sys.exc_info()) onerror(os.open, fullname, sys.exc_info())
else: else:
@ -417,14 +422,14 @@ def _rmtree_safe_fd(topfd, path, onerror):
if os.path.samestat(orig_st, os.fstat(dirfd)): if os.path.samestat(orig_st, os.fstat(dirfd)):
_rmtree_safe_fd(dirfd, fullname, onerror) _rmtree_safe_fd(dirfd, fullname, onerror)
try: try:
os.rmdir(name, dir_fd=topfd) os.rmdir(entry.name, dir_fd=topfd)
except OSError: except OSError:
onerror(os.rmdir, fullname, sys.exc_info()) onerror(os.rmdir, fullname, sys.exc_info())
else: else:
try: try:
# This can only happen if someone replaces # This can only happen if someone replaces
# a directory with a symlink after the call to # a directory with a symlink after the call to
# stat.S_ISDIR above. # os.scandir or stat.S_ISDIR above.
raise OSError("Cannot call rmtree on a symbolic " raise OSError("Cannot call rmtree on a symbolic "
"link") "link")
except OSError: except OSError:
@ -433,13 +438,13 @@ def _rmtree_safe_fd(topfd, path, onerror):
os.close(dirfd) os.close(dirfd)
else: else:
try: try:
os.unlink(name, dir_fd=topfd) os.unlink(entry.name, dir_fd=topfd)
except OSError: except OSError:
onerror(os.unlink, fullname, sys.exc_info()) onerror(os.unlink, fullname, sys.exc_info())
_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <= _use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
os.supports_dir_fd and os.supports_dir_fd and
os.listdir in os.supports_fd and os.scandir in os.supports_fd and
os.stat in os.supports_follow_symlinks) os.stat in os.supports_follow_symlinks)
def rmtree(path, ignore_errors=False, onerror=None): def rmtree(path, ignore_errors=False, onerror=None):
@ -491,6 +496,14 @@ def rmtree(path, ignore_errors=False, onerror=None):
finally: finally:
os.close(fd) os.close(fd)
else: else:
try:
if os.path.islink(path):
# symlinks to directories are forbidden, see bug #1669
raise OSError("Cannot call rmtree on a symbolic link")
except OSError:
onerror(os.path.islink, path, sys.exc_info())
# can't continue even if onerror hook returns
return
return _rmtree_unsafe(path, onerror) return _rmtree_unsafe(path, onerror)
# Allow introspection of whether or not the hardening against symlink # Allow introspection of whether or not the hardening against symlink

View File

@ -183,7 +183,7 @@ class TestShutil(unittest.TestCase):
errors.append(args) errors.append(args)
shutil.rmtree(filename, onerror=onerror) shutil.rmtree(filename, onerror=onerror)
self.assertEqual(len(errors), 2) self.assertEqual(len(errors), 2)
self.assertIs(errors[0][0], os.listdir) self.assertIs(errors[0][0], os.scandir)
self.assertEqual(errors[0][1], filename) self.assertEqual(errors[0][1], filename)
self.assertIsInstance(errors[0][2][1], NotADirectoryError) self.assertIsInstance(errors[0][2][1], NotADirectoryError)
self.assertIn(errors[0][2][1].filename, possible_args) self.assertIn(errors[0][2][1].filename, possible_args)

View File

@ -0,0 +1,2 @@
The shutil.rmtree() function has been sped up to 20--40%. This was done
using the os.scandir() function.