diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index 91e0d5bb7b3..e5e6d4a5994 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -277,6 +277,14 @@ Optimizations See :ref:`shutil-platform-dependent-efficient-copy-operations` section. (Contributed by Giampaolo Rodola' in :issue:`25427`.) +* :func:`shutil.copytree` uses :func:`os.scandir` function and all copy + functions depending from it use cached :func:`os.stat` values. The speedup + for copying a directory with 8000 files is around +9% on Linux, +20% on + Windows and +30% on a Windows SMB share. Also the number of :func:`os.stat` + syscalls is reduced by 38% making :func:`shutil.copytree` especially faster + on network filesystems. (Contributed by Giampaolo Rodola' in :issue:`33695`.) + + * The default protocol in the :mod:`pickle` module is now Protocol 4, first introduced in Python 3.4. It offers better performance and smaller size compared to Protocol 3 available since Python 3.0. diff --git a/Lib/shutil.py b/Lib/shutil.py index b7a7df3a51f..74348ba62ef 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -200,6 +200,12 @@ def copyfileobj(fsrc, fdst, length=COPY_BUFSIZE): def _samefile(src, dst): # Macintosh, Unix. + if isinstance(src, os.DirEntry) and hasattr(os.path, 'samestat'): + try: + return os.path.samestat(src.stat(), os.stat(dst)) + except OSError: + return False + if hasattr(os.path, 'samefile'): try: return os.path.samefile(src, dst) @@ -210,6 +216,12 @@ def _samefile(src, dst): return (os.path.normcase(os.path.abspath(src)) == os.path.normcase(os.path.abspath(dst))) +def _stat(fn): + return fn.stat() if isinstance(fn, os.DirEntry) else os.stat(fn) + +def _islink(fn): + return fn.is_symlink() if isinstance(fn, os.DirEntry) else os.path.islink(fn) + def copyfile(src, dst, *, follow_symlinks=True): """Copy data from src to dst in the most efficient way possible. @@ -223,18 +235,19 @@ def copyfile(src, dst, *, follow_symlinks=True): file_size = 0 for i, fn in enumerate([src, dst]): try: - st = os.stat(fn) + st = _stat(fn) except OSError: # File most likely does not exist pass else: # XXX What about other special files? (sockets, devices...) if stat.S_ISFIFO(st.st_mode): + fn = fn.path if isinstance(fn, os.DirEntry) else fn raise SpecialFileError("`%s` is a named pipe" % fn) if _WINDOWS and i == 0: file_size = st.st_size - if not follow_symlinks and os.path.islink(src): + if not follow_symlinks and _islink(src): os.symlink(os.readlink(src), dst) else: with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst: @@ -270,13 +283,13 @@ def copymode(src, dst, *, follow_symlinks=True): (e.g. Linux) this method does nothing. """ - if not follow_symlinks and os.path.islink(src) and os.path.islink(dst): + if not follow_symlinks and _islink(src) and os.path.islink(dst): if hasattr(os, 'lchmod'): stat_func, chmod_func = os.lstat, os.lchmod else: return elif hasattr(os, 'chmod'): - stat_func, chmod_func = os.stat, os.chmod + stat_func, chmod_func = _stat, os.chmod else: return @@ -325,7 +338,7 @@ def copystat(src, dst, *, follow_symlinks=True): pass # follow symlinks (aka don't not follow symlinks) - follow = follow_symlinks or not (os.path.islink(src) and os.path.islink(dst)) + follow = follow_symlinks or not (_islink(src) and os.path.islink(dst)) if follow: # use the real function if it exists def lookup(name): @@ -339,7 +352,10 @@ def copystat(src, dst, *, follow_symlinks=True): return fn return _nop - st = lookup("stat")(src, follow_symlinks=follow) + if isinstance(src, os.DirEntry): + st = src.stat(follow_symlinks=follow) + else: + st = lookup("stat")(src, follow_symlinks=follow) mode = stat.S_IMODE(st.st_mode) lookup("utime")(dst, ns=(st.st_atime_ns, st.st_mtime_ns), follow_symlinks=follow) @@ -415,6 +431,63 @@ def ignore_patterns(*patterns): return set(ignored_names) return _ignore_patterns +def _copytree(entries, src, dst, symlinks, ignore, copy_function, + ignore_dangling_symlinks): + if ignore is not None: + ignored_names = ignore(src, set(os.listdir(src))) + else: + ignored_names = set() + + os.makedirs(dst) + errors = [] + use_srcentry = copy_function is copy2 or copy_function is copy + + for srcentry in entries: + if srcentry.name in ignored_names: + continue + srcname = os.path.join(src, srcentry.name) + dstname = os.path.join(dst, srcentry.name) + srcobj = srcentry if use_srcentry else srcname + try: + if srcentry.is_symlink(): + linkto = os.readlink(srcname) + if symlinks: + # We can't just leave it to `copy_function` because legacy + # code with a custom `copy_function` may rely on copytree + # doing the right thing. + os.symlink(linkto, dstname) + copystat(srcobj, dstname, follow_symlinks=not symlinks) + else: + # ignore dangling symlink if the flag is on + if not os.path.exists(linkto) and ignore_dangling_symlinks: + continue + # otherwise let the copy occurs. copy2 will raise an error + if srcentry.is_dir(): + copytree(srcobj, dstname, symlinks, ignore, + copy_function) + else: + copy_function(srcobj, dstname) + elif srcentry.is_dir(): + copytree(srcobj, dstname, symlinks, ignore, copy_function) + else: + # Will raise a SpecialFileError for unsupported file types + copy_function(srcentry, dstname) + # catch the Error from the recursive copytree so that we can + # continue with other files + except Error as err: + errors.extend(err.args[0]) + except OSError as why: + errors.append((srcname, dstname, str(why))) + try: + copystat(src, dst) + except OSError as why: + # Copying file access times may fail on Windows + if getattr(why, 'winerror', None) is None: + errors.append((src, dst, str(why))) + if errors: + raise Error(errors) + return dst + def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, ignore_dangling_symlinks=False): """Recursively copy a directory tree. @@ -451,58 +524,10 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, function that supports the same signature (like copy()) can be used. """ - names = os.listdir(src) - if ignore is not None: - ignored_names = ignore(src, names) - else: - ignored_names = set() - - os.makedirs(dst) - errors = [] - for name in names: - if name in ignored_names: - continue - srcname = os.path.join(src, name) - dstname = os.path.join(dst, name) - try: - if os.path.islink(srcname): - linkto = os.readlink(srcname) - if symlinks: - # We can't just leave it to `copy_function` because legacy - # code with a custom `copy_function` may rely on copytree - # doing the right thing. - os.symlink(linkto, dstname) - copystat(srcname, dstname, follow_symlinks=not symlinks) - else: - # ignore dangling symlink if the flag is on - if not os.path.exists(linkto) and ignore_dangling_symlinks: - continue - # otherwise let the copy occurs. copy2 will raise an error - if os.path.isdir(srcname): - copytree(srcname, dstname, symlinks, ignore, - copy_function) - else: - copy_function(srcname, dstname) - elif os.path.isdir(srcname): - copytree(srcname, dstname, symlinks, ignore, copy_function) - else: - # Will raise a SpecialFileError for unsupported file types - copy_function(srcname, dstname) - # catch the Error from the recursive copytree so that we can - # continue with other files - except Error as err: - errors.extend(err.args[0]) - except OSError as why: - errors.append((srcname, dstname, str(why))) - try: - copystat(src, dst) - except OSError as why: - # Copying file access times may fail on Windows - if getattr(why, 'winerror', None) is None: - errors.append((src, dst, str(why))) - if errors: - raise Error(errors) - return dst + with os.scandir(src) as entries: + return _copytree(entries=entries, src=src, dst=dst, symlinks=symlinks, + ignore=ignore, copy_function=copy_function, + ignore_dangling_symlinks=ignore_dangling_symlinks) # version vulnerable to race conditions def _rmtree_unsafe(path, onerror): diff --git a/Misc/NEWS.d/next/Library/2018-06-23-12-47-37.bpo-33695.seRTxh.rst b/Misc/NEWS.d/next/Library/2018-06-23-12-47-37.bpo-33695.seRTxh.rst new file mode 100644 index 00000000000..21950453b0a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-06-23-12-47-37.bpo-33695.seRTxh.rst @@ -0,0 +1,7 @@ +:func:`shutil.copytree` uses :func:`os.scandir` function and all copy +functions depending from it use cached :func:`os.stat` values. The speedup +for copying a directory with 8000 files is around +9% on Linux, +20% on +Windows and + 30% on a Windows SMB share. Also the number of :func:`os.stat` +syscalls is reduced by 38% making :func:`shutil.copytree` especially faster +on network filesystems. +(Contributed by Giampaolo Rodola' in :issue:`33695`.)