bpo-28564: Use os.scandir() in shutil.rmtree(). (#4085)
This speeds up it to 20-40%.
This commit is contained in:
parent
82cd3cede8
commit
d4d79bc1ff
|
@ -440,6 +440,10 @@ Optimizations
|
||||||
using the :func:`os.scandir` function.
|
using the :func:`os.scandir` function.
|
||||||
(Contributed by Serhiy Storchaka in :issue:`25996`.)
|
(Contributed by Serhiy Storchaka in :issue:`25996`.)
|
||||||
|
|
||||||
|
* The :func:`shutil.rmtree` function has been sped up to 20--40%.
|
||||||
|
This was done using the :func:`os.scandir` function.
|
||||||
|
(Contributed by Serhiy Storchaka in :issue:`28564`.)
|
||||||
|
|
||||||
* Optimized case-insensitive matching and searching of :mod:`regular
|
* Optimized case-insensitive matching and searching of :mod:`regular
|
||||||
expressions <re>`. Searching some patterns can now be up to 20 times faster.
|
expressions <re>`. Searching some patterns can now be up to 20 times faster.
|
||||||
(Contributed by Serhiy Storchaka in :issue:`30285`.)
|
(Contributed by Serhiy Storchaka in :issue:`30285`.)
|
||||||
|
@ -656,6 +660,11 @@ Changes in the Python API
|
||||||
* ``repr`` for :class:`datetime.timedelta` has changed to include keyword arguments
|
* ``repr`` for :class:`datetime.timedelta` has changed to include keyword arguments
|
||||||
in the output. (Contributed by Utkarsh Upadhyay in :issue:`30302`.)
|
in the output. (Contributed by Utkarsh Upadhyay in :issue:`30302`.)
|
||||||
|
|
||||||
|
* Because :func:`shutil.rmtree` is now implemented using the :func:`os.scandir`
|
||||||
|
function, the user specified handler *onerror* is now called with the first
|
||||||
|
argument ``os.scandir`` instead of ``os.listdir`` when listing the direcory
|
||||||
|
is failed.
|
||||||
|
|
||||||
|
|
||||||
Changes in the C API
|
Changes in the C API
|
||||||
--------------------
|
--------------------
|
||||||
|
|
|
@ -362,25 +362,27 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
|
||||||
# version vulnerable to race conditions
|
# version vulnerable to race conditions
|
||||||
def _rmtree_unsafe(path, onerror):
|
def _rmtree_unsafe(path, onerror):
|
||||||
try:
|
try:
|
||||||
if os.path.islink(path):
|
with os.scandir(path) as scandir_it:
|
||||||
# symlinks to directories are forbidden, see bug #1669
|
entries = list(scandir_it)
|
||||||
raise OSError("Cannot call rmtree on a symbolic link")
|
|
||||||
except OSError:
|
except OSError:
|
||||||
onerror(os.path.islink, path, sys.exc_info())
|
onerror(os.scandir, path, sys.exc_info())
|
||||||
# can't continue even if onerror hook returns
|
entries = []
|
||||||
return
|
for entry in entries:
|
||||||
names = []
|
fullname = entry.path
|
||||||
try:
|
|
||||||
names = os.listdir(path)
|
|
||||||
except OSError:
|
|
||||||
onerror(os.listdir, path, sys.exc_info())
|
|
||||||
for name in names:
|
|
||||||
fullname = os.path.join(path, name)
|
|
||||||
try:
|
try:
|
||||||
mode = os.lstat(fullname).st_mode
|
is_dir = entry.is_dir(follow_symlinks=False)
|
||||||
except OSError:
|
except OSError:
|
||||||
mode = 0
|
is_dir = False
|
||||||
if stat.S_ISDIR(mode):
|
if is_dir:
|
||||||
|
try:
|
||||||
|
if entry.is_symlink():
|
||||||
|
# This can only happen if someone replaces
|
||||||
|
# a directory with a symlink after the call to
|
||||||
|
# os.scandir or entry.is_dir above.
|
||||||
|
raise OSError("Cannot call rmtree on a symbolic link")
|
||||||
|
except OSError:
|
||||||
|
onerror(os.path.islink, fullname, sys.exc_info())
|
||||||
|
continue
|
||||||
_rmtree_unsafe(fullname, onerror)
|
_rmtree_unsafe(fullname, onerror)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
|
@ -394,22 +396,25 @@ def _rmtree_unsafe(path, onerror):
|
||||||
|
|
||||||
# Version using fd-based APIs to protect against races
|
# Version using fd-based APIs to protect against races
|
||||||
def _rmtree_safe_fd(topfd, path, onerror):
|
def _rmtree_safe_fd(topfd, path, onerror):
|
||||||
names = []
|
|
||||||
try:
|
try:
|
||||||
names = os.listdir(topfd)
|
with os.scandir(topfd) as scandir_it:
|
||||||
|
entries = list(scandir_it)
|
||||||
except OSError as err:
|
except OSError as err:
|
||||||
err.filename = path
|
err.filename = path
|
||||||
onerror(os.listdir, path, sys.exc_info())
|
onerror(os.scandir, path, sys.exc_info())
|
||||||
for name in names:
|
return
|
||||||
fullname = os.path.join(path, name)
|
for entry in entries:
|
||||||
|
fullname = os.path.join(path, entry.name)
|
||||||
try:
|
try:
|
||||||
orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False)
|
is_dir = entry.is_dir(follow_symlinks=False)
|
||||||
mode = orig_st.st_mode
|
if is_dir:
|
||||||
|
orig_st = entry.stat(follow_symlinks=False)
|
||||||
|
is_dir = stat.S_ISDIR(orig_st.st_mode)
|
||||||
except OSError:
|
except OSError:
|
||||||
mode = 0
|
is_dir = False
|
||||||
if stat.S_ISDIR(mode):
|
if is_dir:
|
||||||
try:
|
try:
|
||||||
dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd)
|
dirfd = os.open(entry.name, os.O_RDONLY, dir_fd=topfd)
|
||||||
except OSError:
|
except OSError:
|
||||||
onerror(os.open, fullname, sys.exc_info())
|
onerror(os.open, fullname, sys.exc_info())
|
||||||
else:
|
else:
|
||||||
|
@ -417,14 +422,14 @@ def _rmtree_safe_fd(topfd, path, onerror):
|
||||||
if os.path.samestat(orig_st, os.fstat(dirfd)):
|
if os.path.samestat(orig_st, os.fstat(dirfd)):
|
||||||
_rmtree_safe_fd(dirfd, fullname, onerror)
|
_rmtree_safe_fd(dirfd, fullname, onerror)
|
||||||
try:
|
try:
|
||||||
os.rmdir(name, dir_fd=topfd)
|
os.rmdir(entry.name, dir_fd=topfd)
|
||||||
except OSError:
|
except OSError:
|
||||||
onerror(os.rmdir, fullname, sys.exc_info())
|
onerror(os.rmdir, fullname, sys.exc_info())
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
# This can only happen if someone replaces
|
# This can only happen if someone replaces
|
||||||
# a directory with a symlink after the call to
|
# a directory with a symlink after the call to
|
||||||
# stat.S_ISDIR above.
|
# os.scandir or stat.S_ISDIR above.
|
||||||
raise OSError("Cannot call rmtree on a symbolic "
|
raise OSError("Cannot call rmtree on a symbolic "
|
||||||
"link")
|
"link")
|
||||||
except OSError:
|
except OSError:
|
||||||
|
@ -433,13 +438,13 @@ def _rmtree_safe_fd(topfd, path, onerror):
|
||||||
os.close(dirfd)
|
os.close(dirfd)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
os.unlink(name, dir_fd=topfd)
|
os.unlink(entry.name, dir_fd=topfd)
|
||||||
except OSError:
|
except OSError:
|
||||||
onerror(os.unlink, fullname, sys.exc_info())
|
onerror(os.unlink, fullname, sys.exc_info())
|
||||||
|
|
||||||
_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
|
_use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
|
||||||
os.supports_dir_fd and
|
os.supports_dir_fd and
|
||||||
os.listdir in os.supports_fd and
|
os.scandir in os.supports_fd and
|
||||||
os.stat in os.supports_follow_symlinks)
|
os.stat in os.supports_follow_symlinks)
|
||||||
|
|
||||||
def rmtree(path, ignore_errors=False, onerror=None):
|
def rmtree(path, ignore_errors=False, onerror=None):
|
||||||
|
@ -491,6 +496,14 @@ def rmtree(path, ignore_errors=False, onerror=None):
|
||||||
finally:
|
finally:
|
||||||
os.close(fd)
|
os.close(fd)
|
||||||
else:
|
else:
|
||||||
|
try:
|
||||||
|
if os.path.islink(path):
|
||||||
|
# symlinks to directories are forbidden, see bug #1669
|
||||||
|
raise OSError("Cannot call rmtree on a symbolic link")
|
||||||
|
except OSError:
|
||||||
|
onerror(os.path.islink, path, sys.exc_info())
|
||||||
|
# can't continue even if onerror hook returns
|
||||||
|
return
|
||||||
return _rmtree_unsafe(path, onerror)
|
return _rmtree_unsafe(path, onerror)
|
||||||
|
|
||||||
# Allow introspection of whether or not the hardening against symlink
|
# Allow introspection of whether or not the hardening against symlink
|
||||||
|
|
|
@ -183,7 +183,7 @@ class TestShutil(unittest.TestCase):
|
||||||
errors.append(args)
|
errors.append(args)
|
||||||
shutil.rmtree(filename, onerror=onerror)
|
shutil.rmtree(filename, onerror=onerror)
|
||||||
self.assertEqual(len(errors), 2)
|
self.assertEqual(len(errors), 2)
|
||||||
self.assertIs(errors[0][0], os.listdir)
|
self.assertIs(errors[0][0], os.scandir)
|
||||||
self.assertEqual(errors[0][1], filename)
|
self.assertEqual(errors[0][1], filename)
|
||||||
self.assertIsInstance(errors[0][2][1], NotADirectoryError)
|
self.assertIsInstance(errors[0][2][1], NotADirectoryError)
|
||||||
self.assertIn(errors[0][2][1].filename, possible_args)
|
self.assertIn(errors[0][2][1].filename, possible_args)
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
The shutil.rmtree() function has been sped up to 20--40%. This was done
|
||||||
|
using the os.scandir() function.
|
Loading…
Reference in New Issue