diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index 1527deb167f..a3b87ee61a3 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -51,7 +51,9 @@ Directory and files operations .. function:: copyfile(src, dst, *, follow_symlinks=True) Copy the contents (no metadata) of the file named *src* to a file named - *dst* and return *dst*. *src* and *dst* are path names given as strings. + *dst* and return *dst* in the most efficient way possible. + *src* and *dst* are path names given as strings. + *dst* must be the complete target file name; look at :func:`shutil.copy` for a copy that accepts a target directory path. If *src* and *dst* specify the same file, :exc:`SameFileError` is raised. @@ -74,6 +76,10 @@ Directory and files operations Raise :exc:`SameFileError` instead of :exc:`Error`. Since the former is a subclass of the latter, this change is backward compatible. + .. versionchanged:: 3.8 + Platform-specific fast-copy syscalls may be used internally in order to + copy the file more efficiently. See + :ref:`shutil-platform-dependent-efficient-copy-operations` section. .. exception:: SameFileError @@ -163,6 +169,11 @@ Directory and files operations Added *follow_symlinks* argument. Now returns path to the newly created file. + .. versionchanged:: 3.8 + Platform-specific fast-copy syscalls may be used internally in order to + copy the file more efficiently. See + :ref:`shutil-platform-dependent-efficient-copy-operations` section. + .. function:: copy2(src, dst, *, follow_symlinks=True) Identical to :func:`~shutil.copy` except that :func:`copy2` @@ -185,6 +196,11 @@ Directory and files operations file system attributes too (currently Linux only). Now returns path to the newly created file. + .. versionchanged:: 3.8 + Platform-specific fast-copy syscalls may be used internally in order to + copy the file more efficiently. See + :ref:`shutil-platform-dependent-efficient-copy-operations` section. + .. function:: ignore_patterns(\*patterns) This factory function creates a function that can be used as a callable for @@ -241,6 +257,10 @@ Directory and files operations Added the *ignore_dangling_symlinks* argument to silent dangling symlinks errors when *symlinks* is false. + .. versionchanged:: 3.8 + Platform-specific fast-copy syscalls may be used internally in order to + copy the file more efficiently. See + :ref:`shutil-platform-dependent-efficient-copy-operations` section. .. function:: rmtree(path, ignore_errors=False, onerror=None) @@ -314,6 +334,11 @@ Directory and files operations .. versionchanged:: 3.5 Added the *copy_function* keyword argument. + .. versionchanged:: 3.8 + Platform-specific fast-copy syscalls may be used internally in order to + copy the file more efficiently. See + :ref:`shutil-platform-dependent-efficient-copy-operations` section. + .. function:: disk_usage(path) Return disk usage statistics about the given path as a :term:`named tuple` @@ -370,6 +395,28 @@ Directory and files operations operation. For :func:`copytree`, the exception argument is a list of 3-tuples (*srcname*, *dstname*, *exception*). +.. _shutil-platform-dependent-efficient-copy-operations: + +Platform-dependent efficient copy operations +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Starting from Python 3.8 all functions involving a file copy (:func:`copyfile`, +:func:`copy`, :func:`copy2`, :func:`copytree`, and :func:`move`) may use +platform-specific "fast-copy" syscalls in order to copy the file more +efficiently (see :issue:`33671`). +"fast-copy" means that the copying operation occurs within the kernel, avoiding +the use of userspace buffers in Python as in "``outfd.write(infd.read())``". + +On OSX `fcopyfile`_ is used to copy the file content (not metadata). + +On Linux, Solaris and other POSIX platforms where :func:`os.sendfile` supports +copies between 2 regular file descriptors :func:`os.sendfile` is used. + +If the fast-copy operation fails and no data was written in the destination +file then shutil will silently fallback on using less efficient +:func:`copyfileobj` function internally. + +.. versionchanged:: 3.8 .. _shutil-copytree-example: @@ -654,6 +701,8 @@ Querying the size of the output terminal .. versionadded:: 3.3 +.. _`fcopyfile`: + http://www.manpagez.com/man/3/copyfile/ + .. _`Other Environment Variables`: http://pubs.opengroup.org/onlinepubs/7908799/xbd/envvar.html#tag_002_003 - diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index 52c76263264..9ae52c241f1 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -90,10 +90,27 @@ New Modules Improved Modules ================ - Optimizations ============= +* :func:`shutil.copyfile`, :func:`shutil.copy`, :func:`shutil.copy2`, + :func:`shutil.copytree` and :func:`shutil.move` use platform-specific + "fast-copy" syscalls on Linux, OSX and Solaris in order to copy the file more + efficiently. + "fast-copy" means that the copying operation occurs within the kernel, + avoiding the use of userspace buffers in Python as in + "``outfd.write(infd.read())``". + All other platforms not using such technique will rely on a faster + :func:`shutil.copyfile` implementation using :func:`memoryview`, + :class:`bytearray` and + :meth:`BufferedIOBase.readinto() `. + Finally, :func:`shutil.copyfile` default buffer size on Windows was increased + from 16KB to 1MB. + The speedup for copying a 512MB file within the same partition is about +26% + on Linux, +50% on OSX and +38% on Windows. Also, much less CPU cycles are + consumed. + (Contributed by Giampaolo Rodola' in :issue:`25427`.) + * The default protocol in the :mod:`pickle` module is now Protocol 4, first introduced in Python 3.4. It offers better performance and smaller size compared to Protocol 3 available since Python 3.0. diff --git a/Lib/shutil.py b/Lib/shutil.py index 3c02776a406..09a5727ab46 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -10,6 +10,7 @@ import stat import fnmatch import collections import errno +import io try: import zlib @@ -42,6 +43,16 @@ try: except ImportError: getgrnam = None +posix = nt = None +if os.name == 'posix': + import posix +elif os.name == 'nt': + import nt + +COPY_BUFSIZE = 1024 * 1024 if os.name == 'nt' else 16 * 1024 +_HAS_SENDFILE = posix and hasattr(os, "sendfile") +_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # OSX + __all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2", "copytree", "move", "rmtree", "Error", "SpecialFileError", "ExecError", "make_archive", "get_archive_formats", @@ -72,14 +83,124 @@ class RegistryError(Exception): """Raised when a registry operation with the archiving and unpacking registries fails""" +class _GiveupOnFastCopy(Exception): + """Raised as a signal to fallback on using raw read()/write() + file copy when fast-copy functions fail to do so. + """ -def copyfileobj(fsrc, fdst, length=16*1024): +def _fastcopy_osx(fsrc, fdst, flags): + """Copy a regular file content or metadata by using high-performance + fcopyfile(3) syscall (OSX). + """ + try: + infd = fsrc.fileno() + outfd = fdst.fileno() + except Exception as err: + raise _GiveupOnFastCopy(err) # not a regular file + + try: + posix._fcopyfile(infd, outfd, flags) + except OSError as err: + err.filename = fsrc.name + err.filename2 = fdst.name + if err.errno in {errno.EINVAL, errno.ENOTSUP}: + raise _GiveupOnFastCopy(err) + else: + raise err from None + +def _fastcopy_sendfile(fsrc, fdst): + """Copy data from one regular mmap-like fd to another by using + high-performance sendfile(2) syscall. + This should work on Linux >= 2.6.33 and Solaris only. + """ + # Note: copyfileobj() is left alone in order to not introduce any + # unexpected breakage. Possible risks by using zero-copy calls + # in copyfileobj() are: + # - fdst cannot be open in "a"(ppend) mode + # - fsrc and fdst may be open in "t"(ext) mode + # - fsrc may be a BufferedReader (which hides unread data in a buffer), + # GzipFile (which decompresses data), HTTPResponse (which decodes + # chunks). + # - possibly others (e.g. encrypted fs/partition?) + global _HAS_SENDFILE + try: + infd = fsrc.fileno() + outfd = fdst.fileno() + except Exception as err: + raise _GiveupOnFastCopy(err) # not a regular file + + # Hopefully the whole file will be copied in a single call. + # sendfile() is called in a loop 'till EOF is reached (0 return) + # so a bufsize smaller or bigger than the actual file size + # should not make any difference, also in case the file content + # changes while being copied. + try: + blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MB + except Exception: + blocksize = 2 ** 27 # 128MB + + offset = 0 + while True: + try: + sent = os.sendfile(outfd, infd, offset, blocksize) + except OSError as err: + # ...in oder to have a more informative exception. + err.filename = fsrc.name + err.filename2 = fdst.name + + if err.errno == errno.ENOTSOCK: + # sendfile() on this platform (probably Linux < 2.6.33) + # does not support copies between regular files (only + # sockets). + _HAS_SENDFILE = False + raise _GiveupOnFastCopy(err) + + if err.errno == errno.ENOSPC: # filesystem is full + raise err from None + + # Give up on first call and if no data was copied. + if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0: + raise _GiveupOnFastCopy(err) + + raise err + else: + if sent == 0: + break # EOF + offset += sent + +def _copybinfileobj(fsrc, fdst, length=COPY_BUFSIZE): + """Copy 2 regular file objects open in binary mode.""" + # Localize variable access to minimize overhead. + fsrc_readinto = fsrc.readinto + fdst_write = fdst.write + with memoryview(bytearray(length)) as mv: + while True: + n = fsrc_readinto(mv) + if not n: + break + elif n < length: + fdst_write(mv[:n]) + else: + fdst_write(mv) + +def _is_binary_files_pair(fsrc, fdst): + return hasattr(fsrc, 'readinto') and \ + isinstance(fsrc, io.BytesIO) or 'b' in getattr(fsrc, 'mode', '') and \ + isinstance(fdst, io.BytesIO) or 'b' in getattr(fdst, 'mode', '') + +def copyfileobj(fsrc, fdst, length=COPY_BUFSIZE): """copy data from file-like object fsrc to file-like object fdst""" - while 1: - buf = fsrc.read(length) - if not buf: - break - fdst.write(buf) + if _is_binary_files_pair(fsrc, fdst): + _copybinfileobj(fsrc, fdst, length=length) + else: + # Localize variable access to minimize overhead. + fsrc_read = fsrc.read + fdst_write = fdst.write + while 1: + buf = fsrc_read(length) + if not buf: + break + fdst_write(buf) def _samefile(src, dst): # Macintosh, Unix. @@ -117,9 +238,23 @@ def copyfile(src, dst, *, follow_symlinks=True): if not follow_symlinks and os.path.islink(src): os.symlink(os.readlink(src), dst) else: - with open(src, 'rb') as fsrc: - with open(dst, 'wb') as fdst: - copyfileobj(fsrc, fdst) + with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst: + if _HAS_SENDFILE: + try: + _fastcopy_sendfile(fsrc, fdst) + return dst + except _GiveupOnFastCopy: + pass + + if _HAS_FCOPYFILE: + try: + _fastcopy_osx(fsrc, fdst, posix._COPYFILE_DATA) + return dst + except _GiveupOnFastCopy: + pass + + _copybinfileobj(fsrc, fdst) + return dst def copymode(src, dst, *, follow_symlinks=True): @@ -244,13 +379,12 @@ def copy(src, dst, *, follow_symlinks=True): def copy2(src, dst, *, follow_symlinks=True): """Copy data and all stat info ("cp -p src dst"). Return the file's - destination." + destination. The destination may be a directory. If follow_symlinks is false, symlinks won't be followed. This resembles GNU's "cp -P src dst". - """ if os.path.isdir(dst): dst = os.path.join(dst, os.path.basename(src)) @@ -1015,7 +1149,6 @@ if hasattr(os, 'statvfs'): elif os.name == 'nt': - import nt __all__.append('disk_usage') _ntuple_diskusage = collections.namedtuple('usage', 'total used free') diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 2cb2f14643e..8d519944191 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -12,20 +12,28 @@ import errno import functools import pathlib import subprocess +import random +import string +import contextlib +import io from shutil import (make_archive, register_archive_format, unregister_archive_format, get_archive_formats, Error, unpack_archive, register_unpack_format, RegistryError, unregister_unpack_format, get_unpack_formats, - SameFileError) + SameFileError, _GiveupOnFastCopy) import tarfile import zipfile +try: + import posix +except ImportError: + posix = None from test import support from test.support import TESTFN, FakePath TESTFN2 = TESTFN + "2" - +OSX = sys.platform.startswith("darwin") try: import grp import pwd @@ -60,6 +68,24 @@ def write_file(path, content, binary=False): with open(path, 'wb' if binary else 'w') as fp: fp.write(content) +def write_test_file(path, size): + """Create a test file with an arbitrary size and random text content.""" + def chunks(total, step): + assert total >= step + while total > step: + yield step + total -= step + if total: + yield total + + bufsize = min(size, 8192) + chunk = b"".join([random.choice(string.ascii_letters).encode() + for i in range(bufsize)]) + with open(path, 'wb') as f: + for csize in chunks(size, bufsize): + f.write(chunk) + assert os.path.getsize(path) == size + def read_file(path, binary=False): """Return contents from a file located at *path*. @@ -84,6 +110,37 @@ def rlistdir(path): res.append(name) return res +def supports_file2file_sendfile(): + # ...apparently Linux and Solaris are the only ones + if not hasattr(os, "sendfile"): + return False + srcname = None + dstname = None + try: + with tempfile.NamedTemporaryFile("wb", delete=False) as f: + srcname = f.name + f.write(b"0123456789") + + with open(srcname, "rb") as src: + with tempfile.NamedTemporaryFile("wb", delete=False) as dst: + dstname = f.name + infd = src.fileno() + outfd = dst.fileno() + try: + os.sendfile(outfd, infd, 0, 2) + except OSError: + return False + else: + return True + finally: + if srcname is not None: + support.unlink(srcname) + if dstname is not None: + support.unlink(dstname) + + +SUPPORTS_SENDFILE = supports_file2file_sendfile() + class TestShutil(unittest.TestCase): @@ -1401,6 +1458,8 @@ class TestShutil(unittest.TestCase): self.assertRaises(SameFileError, shutil.copyfile, src_file, src_file) # But Error should work too, to stay backward compatible. self.assertRaises(Error, shutil.copyfile, src_file, src_file) + # Make sure file is not corrupted. + self.assertEqual(read_file(src_file), 'foo') def test_copytree_return_value(self): # copytree returns its destination path. @@ -1749,6 +1808,7 @@ class TestCopyFile(unittest.TestCase): self.assertRaises(OSError, shutil.copyfile, 'srcfile', 'destfile') + @unittest.skipIf(OSX, "skipped on OSX") def test_w_dest_open_fails(self): srcfile = self.Faux() @@ -1768,6 +1828,7 @@ class TestCopyFile(unittest.TestCase): self.assertEqual(srcfile._exited_with[1].args, ('Cannot open "destfile"',)) + @unittest.skipIf(OSX, "skipped on OSX") def test_w_dest_close_fails(self): srcfile = self.Faux() @@ -1790,6 +1851,7 @@ class TestCopyFile(unittest.TestCase): self.assertEqual(srcfile._exited_with[1].args, ('Cannot close',)) + @unittest.skipIf(OSX, "skipped on OSX") def test_w_source_close_fails(self): srcfile = self.Faux(True) @@ -1829,6 +1891,234 @@ class TestCopyFile(unittest.TestCase): finally: os.rmdir(dst_dir) + +class _ZeroCopyFileTest(object): + """Tests common to all zero-copy APIs.""" + FILESIZE = (10 * 1024 * 1024) # 10 MiB + FILEDATA = b"" + PATCHPOINT = "" + + @classmethod + def setUpClass(cls): + write_test_file(TESTFN, cls.FILESIZE) + with open(TESTFN, 'rb') as f: + cls.FILEDATA = f.read() + assert len(cls.FILEDATA) == cls.FILESIZE + + @classmethod + def tearDownClass(cls): + support.unlink(TESTFN) + + def tearDown(self): + support.unlink(TESTFN2) + + @contextlib.contextmanager + def get_files(self): + with open(TESTFN, "rb") as src: + with open(TESTFN2, "wb") as dst: + yield (src, dst) + + def zerocopy_fun(self, *args, **kwargs): + raise NotImplementedError("must be implemented in subclass") + + def reset(self): + self.tearDown() + self.tearDownClass() + self.setUpClass() + self.setUp() + + # --- + + def test_regular_copy(self): + with self.get_files() as (src, dst): + self.zerocopy_fun(src, dst) + self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) + # Make sure the fallback function is not called. + with self.get_files() as (src, dst): + with unittest.mock.patch('shutil.copyfileobj') as m: + shutil.copyfile(TESTFN, TESTFN2) + assert not m.called + + def test_same_file(self): + self.addCleanup(self.reset) + with self.get_files() as (src, dst): + with self.assertRaises(Exception): + self.zerocopy_fun(src, src) + # Make sure src file is not corrupted. + self.assertEqual(read_file(TESTFN, binary=True), self.FILEDATA) + + def test_non_existent_src(self): + name = tempfile.mktemp() + with self.assertRaises(FileNotFoundError) as cm: + shutil.copyfile(name, "new") + self.assertEqual(cm.exception.filename, name) + + def test_empty_file(self): + srcname = TESTFN + 'src' + dstname = TESTFN + 'dst' + self.addCleanup(lambda: support.unlink(srcname)) + self.addCleanup(lambda: support.unlink(dstname)) + with open(srcname, "wb"): + pass + + with open(srcname, "rb") as src: + with open(dstname, "wb") as dst: + self.zerocopy_fun(src, dst) + + self.assertEqual(read_file(dstname, binary=True), b"") + + def test_unhandled_exception(self): + with unittest.mock.patch(self.PATCHPOINT, + side_effect=ZeroDivisionError): + self.assertRaises(ZeroDivisionError, + shutil.copyfile, TESTFN, TESTFN2) + + def test_exception_on_first_call(self): + # Emulate a case where the first call to the zero-copy + # function raises an exception in which case the function is + # supposed to give up immediately. + with unittest.mock.patch(self.PATCHPOINT, + side_effect=OSError(errno.EINVAL, "yo")): + with self.get_files() as (src, dst): + with self.assertRaises(_GiveupOnFastCopy): + self.zerocopy_fun(src, dst) + + def test_filesystem_full(self): + # Emulate a case where filesystem is full and sendfile() fails + # on first call. + with unittest.mock.patch(self.PATCHPOINT, + side_effect=OSError(errno.ENOSPC, "yo")): + with self.get_files() as (src, dst): + self.assertRaises(OSError, self.zerocopy_fun, src, dst) + + +@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported') +class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase): + PATCHPOINT = "os.sendfile" + + def zerocopy_fun(self, fsrc, fdst): + return shutil._fastcopy_sendfile(fsrc, fdst) + + def test_non_regular_file_src(self): + with io.BytesIO(self.FILEDATA) as src: + with open(TESTFN2, "wb") as dst: + with self.assertRaises(_GiveupOnFastCopy): + self.zerocopy_fun(src, dst) + shutil.copyfileobj(src, dst) + + self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) + + def test_non_regular_file_dst(self): + with open(TESTFN, "rb") as src: + with io.BytesIO() as dst: + with self.assertRaises(_GiveupOnFastCopy): + self.zerocopy_fun(src, dst) + shutil.copyfileobj(src, dst) + dst.seek(0) + self.assertEqual(dst.read(), self.FILEDATA) + + def test_exception_on_second_call(self): + def sendfile(*args, **kwargs): + if not flag: + flag.append(None) + return orig_sendfile(*args, **kwargs) + else: + raise OSError(errno.EBADF, "yo") + + flag = [] + orig_sendfile = os.sendfile + with unittest.mock.patch('os.sendfile', create=True, + side_effect=sendfile): + with self.get_files() as (src, dst): + with self.assertRaises(OSError) as cm: + shutil._fastcopy_sendfile(src, dst) + assert flag + self.assertEqual(cm.exception.errno, errno.EBADF) + + def test_cant_get_size(self): + # Emulate a case where src file size cannot be determined. + # Internally bufsize will be set to a small value and + # sendfile() will be called repeatedly. + with unittest.mock.patch('os.fstat', side_effect=OSError) as m: + with self.get_files() as (src, dst): + shutil._fastcopy_sendfile(src, dst) + assert m.called + self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) + + def test_small_chunks(self): + # Force internal file size detection to be smaller than the + # actual file size. We want to force sendfile() to be called + # multiple times, also in order to emulate a src fd which gets + # bigger while it is being copied. + mock = unittest.mock.Mock() + mock.st_size = 65536 + 1 + with unittest.mock.patch('os.fstat', return_value=mock) as m: + with self.get_files() as (src, dst): + shutil._fastcopy_sendfile(src, dst) + assert m.called + self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) + + def test_big_chunk(self): + # Force internal file size detection to be +100MB bigger than + # the actual file size. Make sure sendfile() does not rely on + # file size value except for (maybe) a better throughput / + # performance. + mock = unittest.mock.Mock() + mock.st_size = self.FILESIZE + (100 * 1024 * 1024) + with unittest.mock.patch('os.fstat', return_value=mock) as m: + with self.get_files() as (src, dst): + shutil._fastcopy_sendfile(src, dst) + assert m.called + self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA) + + def test_blocksize_arg(self): + with unittest.mock.patch('os.sendfile', + side_effect=ZeroDivisionError) as m: + self.assertRaises(ZeroDivisionError, + shutil.copyfile, TESTFN, TESTFN2) + blocksize = m.call_args[0][3] + # Make sure file size and the block size arg passed to + # sendfile() are the same. + self.assertEqual(blocksize, os.path.getsize(TESTFN)) + # ...unless we're dealing with a small file. + support.unlink(TESTFN2) + write_file(TESTFN2, b"hello", binary=True) + self.addCleanup(support.unlink, TESTFN2 + '3') + self.assertRaises(ZeroDivisionError, + shutil.copyfile, TESTFN2, TESTFN2 + '3') + blocksize = m.call_args[0][3] + self.assertEqual(blocksize, 2 ** 23) + + def test_file2file_not_supported(self): + # Emulate a case where sendfile() only support file->socket + # fds. In such a case copyfile() is supposed to skip the + # fast-copy attempt from then on. + assert shutil._HAS_SENDFILE + try: + with unittest.mock.patch( + self.PATCHPOINT, + side_effect=OSError(errno.ENOTSOCK, "yo")) as m: + with self.get_files() as (src, dst): + with self.assertRaises(_GiveupOnFastCopy): + shutil._fastcopy_sendfile(src, dst) + assert m.called + assert not shutil._HAS_SENDFILE + + with unittest.mock.patch(self.PATCHPOINT) as m: + shutil.copyfile(TESTFN, TESTFN2) + assert not m.called + finally: + shutil._HAS_SENDFILE = True + + +@unittest.skipIf(not OSX, 'OSX only') +class TestZeroCopyOSX(_ZeroCopyFileTest, unittest.TestCase): + PATCHPOINT = "posix._fcopyfile" + + def zerocopy_fun(self, src, dst): + return shutil._fastcopy_osx(src, dst, posix._COPYFILE_DATA) + + class TermsizeTests(unittest.TestCase): def test_does_not_crash(self): """Check if get_terminal_size() returns a meaningful value. diff --git a/Misc/NEWS.d/next/Library/2018-05-28-23-25-17.bpo-33671.GIdKKi.rst b/Misc/NEWS.d/next/Library/2018-05-28-23-25-17.bpo-33671.GIdKKi.rst new file mode 100644 index 00000000000..5fd7e1f1e21 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-05-28-23-25-17.bpo-33671.GIdKKi.rst @@ -0,0 +1,11 @@ +:func:`shutil.copyfile`, :func:`shutil.copy`, :func:`shutil.copy2`, +:func:`shutil.copytree` and :func:`shutil.move` use platform-specific +fast-copy syscalls on Linux, Solaris and OSX in order to copy the file +more efficiently. All other platforms not using such technique will rely on a +faster :func:`shutil.copyfile` implementation using :func:`memoryview`, +:class:`bytearray` and +:meth:`BufferedIOBase.readinto() `. +Finally, :func:`shutil.copyfile` default buffer size on Windows was increased +from 16KB to 1MB. The speedup for copying a 512MB file is about +26% on Linux, ++50% on OSX and +38% on Windows. Also, much less CPU cycles are consumed +(Contributed by Giampaolo Rodola' in :issue:`25427`.) diff --git a/Modules/_winapi.c b/Modules/_winapi.c index c596cba3cbc..75d1f0678ef 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -163,6 +163,7 @@ create_converter('LPSECURITY_ATTRIBUTES', '" F_POINTER "') create_converter('BOOL', 'i') # F_BOOL used previously (always 'i') create_converter('DWORD', 'k') # F_DWORD is always "k" (which is much shorter) create_converter('LPCTSTR', 's') +create_converter('LPCWSTR', 'u') create_converter('LPWSTR', 'u') create_converter('UINT', 'I') # F_UINT used previously (always 'I') @@ -186,7 +187,7 @@ class DWORD_return_converter(CReturnConverter): data.return_conversion.append( 'return_value = Py_BuildValue("k", _return_value);\n') [python start generated code]*/ -/*[python end generated code: output=da39a3ee5e6b4b0d input=4527052fe06e5823]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=27456f8555228b62]*/ #include "clinic/_winapi.c.h" diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index e4bbd082450..c41d1314037 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -3853,6 +3853,40 @@ exit: return return_value; } +#if defined(__APPLE__) + +PyDoc_STRVAR(os__fcopyfile__doc__, +"_fcopyfile($module, infd, outfd, flags, /)\n" +"--\n" +"\n" +"Efficiently copy content or metadata of 2 regular file descriptors (OSX)."); + +#define OS__FCOPYFILE_METHODDEF \ + {"_fcopyfile", (PyCFunction)os__fcopyfile, METH_FASTCALL, os__fcopyfile__doc__}, + +static PyObject * +os__fcopyfile_impl(PyObject *module, int infd, int outfd, int flags); + +static PyObject * +os__fcopyfile(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + int infd; + int outfd; + int flags; + + if (!_PyArg_ParseStack(args, nargs, "iii:_fcopyfile", + &infd, &outfd, &flags)) { + goto exit; + } + return_value = os__fcopyfile_impl(module, infd, outfd, flags); + +exit: + return return_value; +} + +#endif /* defined(__APPLE__) */ + PyDoc_STRVAR(os_fstat__doc__, "fstat($module, /, fd)\n" "--\n" @@ -6414,6 +6448,10 @@ exit: #define OS_PREADV_METHODDEF #endif /* !defined(OS_PREADV_METHODDEF) */ +#ifndef OS__FCOPYFILE_METHODDEF + #define OS__FCOPYFILE_METHODDEF +#endif /* !defined(OS__FCOPYFILE_METHODDEF) */ + #ifndef OS_PIPE_METHODDEF #define OS_PIPE_METHODDEF #endif /* !defined(OS_PIPE_METHODDEF) */ @@ -6589,4 +6627,4 @@ exit: #ifndef OS_GETRANDOM_METHODDEF #define OS_GETRANDOM_METHODDEF #endif /* !defined(OS_GETRANDOM_METHODDEF) */ -/*[clinic end generated code: output=8d3d9dddf254c3c2 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b5d1ec71bc6f0651 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 247af1b6ce9..24d8be66665 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -97,6 +97,10 @@ corresponding Unix manual entries for more information on calls."); #include #endif +#if defined(__APPLE__) +#include +#endif + #ifdef HAVE_SCHED_H #include #endif @@ -8742,6 +8746,34 @@ done: #endif /* HAVE_SENDFILE */ +#if defined(__APPLE__) +/*[clinic input] +os._fcopyfile + + infd: int + outfd: int + flags: int + / + +Efficiently copy content or metadata of 2 regular file descriptors (OSX). +[clinic start generated code]*/ + +static PyObject * +os__fcopyfile_impl(PyObject *module, int infd, int outfd, int flags) +/*[clinic end generated code: output=8e8885c721ec38e3 input=aeb9456804eec879]*/ +{ + int ret; + + Py_BEGIN_ALLOW_THREADS + ret = fcopyfile(infd, outfd, NULL, flags); + Py_END_ALLOW_THREADS + if (ret < 0) + return posix_error(); + Py_RETURN_NONE; +} +#endif + + /*[clinic input] os.fstat @@ -12918,6 +12950,7 @@ static PyMethodDef posix_methods[] = { OS_UTIME_METHODDEF OS_TIMES_METHODDEF OS__EXIT_METHODDEF + OS__FCOPYFILE_METHODDEF OS_EXECV_METHODDEF OS_EXECVE_METHODDEF OS_SPAWNV_METHODDEF @@ -13537,6 +13570,10 @@ all_ins(PyObject *m) if (PyModule_AddIntMacro(m, GRND_NONBLOCK)) return -1; #endif +#if defined(__APPLE__) + if (PyModule_AddIntConstant(m, "_COPYFILE_DATA", COPYFILE_DATA)) return -1; +#endif + return 0; }