mirror of https://github.com/python/cpython
bpo-33671 / shutil.copyfile: use memoryview() with dynamic size on Windows (#7681)
bpo-33671 * use memoryview() with size == file size on Windows, see https://github.com/python/cpython/pull/7160#discussion_r195405230 * release intermediate (sliced) memoryview immediately * replace "OSX" occurrences with "macOS" * add some unittests for copyfileobj()
This commit is contained in:
parent
936f03e7fa
commit
c7f02a9659
|
@ -407,11 +407,15 @@ efficiently (see :issue:`33671`).
|
|||
"fast-copy" means that the copying operation occurs within the kernel, avoiding
|
||||
the use of userspace buffers in Python as in "``outfd.write(infd.read())``".
|
||||
|
||||
On OSX `fcopyfile`_ is used to copy the file content (not metadata).
|
||||
On macOS `fcopyfile`_ is used to copy the file content (not metadata).
|
||||
|
||||
On Linux, Solaris and other POSIX platforms where :func:`os.sendfile` supports
|
||||
copies between 2 regular file descriptors :func:`os.sendfile` is used.
|
||||
|
||||
On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB
|
||||
instead of 16 KiB) and a :func:`memoryview`-based variant of
|
||||
:func:`shutil.copyfileobj` is used.
|
||||
|
||||
If the fast-copy operation fails and no data was written in the destination
|
||||
file then shutil will silently fallback on using less efficient
|
||||
:func:`copyfileobj` function internally.
|
||||
|
|
|
@ -95,20 +95,18 @@ Optimizations
|
|||
|
||||
* :func:`shutil.copyfile`, :func:`shutil.copy`, :func:`shutil.copy2`,
|
||||
:func:`shutil.copytree` and :func:`shutil.move` use platform-specific
|
||||
"fast-copy" syscalls on Linux, OSX and Solaris in order to copy the file more
|
||||
efficiently.
|
||||
"fast-copy" syscalls on Linux, macOS and Solaris in order to copy the file
|
||||
more efficiently.
|
||||
"fast-copy" means that the copying operation occurs within the kernel,
|
||||
avoiding the use of userspace buffers in Python as in
|
||||
"``outfd.write(infd.read())``".
|
||||
All other platforms not using such technique will rely on a faster
|
||||
:func:`shutil.copyfile` implementation using :func:`memoryview`,
|
||||
:class:`bytearray` and
|
||||
:meth:`BufferedIOBase.readinto() <io.BufferedIOBase.readinto>`.
|
||||
Finally, :func:`shutil.copyfile` default buffer size on Windows was increased
|
||||
from 16KB to 1MB.
|
||||
The speedup for copying a 512MB file within the same partition is about +26%
|
||||
on Linux, +50% on OSX and +38% on Windows. Also, much less CPU cycles are
|
||||
consumed.
|
||||
On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB
|
||||
instead of 16 KiB) and a :func:`memoryview`-based variant of
|
||||
:func:`shutil.copyfileobj` is used.
|
||||
The speedup for copying a 512 MiB file within the same partition is about
|
||||
+26% on Linux, +50% on macOS and +40% on Windows. Also, much less CPU cycles
|
||||
are consumed.
|
||||
See :ref:`shutil-platform-dependent-efficient-copy-operations` section.
|
||||
(Contributed by Giampaolo Rodola' in :issue:`25427`.)
|
||||
|
||||
* The default protocol in the :mod:`pickle` module is now Protocol 4,
|
||||
|
@ -179,6 +177,14 @@ Changes in the Python API
|
|||
* The :class:`cProfile.Profile` class can now be used as a context
|
||||
manager. (Contributed by Scott Sanderson in :issue:`29235`.)
|
||||
|
||||
* :func:`shutil.copyfile`, :func:`shutil.copy`, :func:`shutil.copy2`,
|
||||
:func:`shutil.copytree` and :func:`shutil.move` use platform-specific
|
||||
"fast-copy" syscalls (see
|
||||
:ref:`shutil-platform-dependent-efficient-copy-operations` section).
|
||||
|
||||
* :func:`shutil.copyfile` default buffer size on Windows was changed from
|
||||
16 KiB to 1 MiB.
|
||||
|
||||
CPython bytecode changes
|
||||
------------------------
|
||||
|
||||
|
|
|
@ -43,15 +43,16 @@ try:
|
|||
except ImportError:
|
||||
getgrnam = None
|
||||
|
||||
_WINDOWS = os.name == 'nt'
|
||||
posix = nt = None
|
||||
if os.name == 'posix':
|
||||
import posix
|
||||
elif os.name == 'nt':
|
||||
elif _WINDOWS:
|
||||
import nt
|
||||
|
||||
COPY_BUFSIZE = 1024 * 1024 if os.name == 'nt' else 16 * 1024
|
||||
COPY_BUFSIZE = 1024 * 1024 if _WINDOWS else 16 * 1024
|
||||
_HAS_SENDFILE = posix and hasattr(os, "sendfile")
|
||||
_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # OSX
|
||||
_HAS_FCOPYFILE = posix and hasattr(posix, "_fcopyfile") # macOS
|
||||
|
||||
__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
|
||||
"copytree", "move", "rmtree", "Error", "SpecialFileError",
|
||||
|
@ -88,9 +89,9 @@ class _GiveupOnFastCopy(Exception):
|
|||
file copy when fast-copy functions fail to do so.
|
||||
"""
|
||||
|
||||
def _fastcopy_osx(fsrc, fdst, flags):
|
||||
def _fastcopy_fcopyfile(fsrc, fdst, flags):
|
||||
"""Copy a regular file content or metadata by using high-performance
|
||||
fcopyfile(3) syscall (OSX).
|
||||
fcopyfile(3) syscall (macOS).
|
||||
"""
|
||||
try:
|
||||
infd = fsrc.fileno()
|
||||
|
@ -168,8 +169,11 @@ def _fastcopy_sendfile(fsrc, fdst):
|
|||
break # EOF
|
||||
offset += sent
|
||||
|
||||
def _copybinfileobj(fsrc, fdst, length=COPY_BUFSIZE):
|
||||
"""Copy 2 regular file objects open in binary mode."""
|
||||
def _copyfileobj_readinto(fsrc, fdst, length=COPY_BUFSIZE):
|
||||
"""readinto()/memoryview() based variant of copyfileobj().
|
||||
*fsrc* must support readinto() method and both files must be
|
||||
open in binary mode.
|
||||
"""
|
||||
# Localize variable access to minimize overhead.
|
||||
fsrc_readinto = fsrc.readinto
|
||||
fdst_write = fdst.write
|
||||
|
@ -179,28 +183,21 @@ def _copybinfileobj(fsrc, fdst, length=COPY_BUFSIZE):
|
|||
if not n:
|
||||
break
|
||||
elif n < length:
|
||||
fdst_write(mv[:n])
|
||||
with mv[:n] as smv:
|
||||
fdst.write(smv)
|
||||
else:
|
||||
fdst_write(mv)
|
||||
|
||||
def _is_binary_files_pair(fsrc, fdst):
|
||||
return hasattr(fsrc, 'readinto') and \
|
||||
isinstance(fsrc, io.BytesIO) or 'b' in getattr(fsrc, 'mode', '') and \
|
||||
isinstance(fdst, io.BytesIO) or 'b' in getattr(fdst, 'mode', '')
|
||||
|
||||
def copyfileobj(fsrc, fdst, length=COPY_BUFSIZE):
|
||||
"""copy data from file-like object fsrc to file-like object fdst"""
|
||||
if _is_binary_files_pair(fsrc, fdst):
|
||||
_copybinfileobj(fsrc, fdst, length=length)
|
||||
else:
|
||||
# Localize variable access to minimize overhead.
|
||||
fsrc_read = fsrc.read
|
||||
fdst_write = fdst.write
|
||||
while 1:
|
||||
buf = fsrc_read(length)
|
||||
if not buf:
|
||||
break
|
||||
fdst_write(buf)
|
||||
# Localize variable access to minimize overhead.
|
||||
fsrc_read = fsrc.read
|
||||
fdst_write = fdst.write
|
||||
while True:
|
||||
buf = fsrc_read(length)
|
||||
if not buf:
|
||||
break
|
||||
fdst_write(buf)
|
||||
|
||||
def _samefile(src, dst):
|
||||
# Macintosh, Unix.
|
||||
|
@ -215,7 +212,7 @@ def _samefile(src, dst):
|
|||
os.path.normcase(os.path.abspath(dst)))
|
||||
|
||||
def copyfile(src, dst, *, follow_symlinks=True):
|
||||
"""Copy data from src to dst.
|
||||
"""Copy data from src to dst in the most efficient way possible.
|
||||
|
||||
If follow_symlinks is not set and src is a symbolic link, a new
|
||||
symlink will be created instead of copying the file it points to.
|
||||
|
@ -224,7 +221,8 @@ def copyfile(src, dst, *, follow_symlinks=True):
|
|||
if _samefile(src, dst):
|
||||
raise SameFileError("{!r} and {!r} are the same file".format(src, dst))
|
||||
|
||||
for fn in [src, dst]:
|
||||
file_size = 0
|
||||
for i, fn in enumerate([src, dst]):
|
||||
try:
|
||||
st = os.stat(fn)
|
||||
except OSError:
|
||||
|
@ -234,26 +232,34 @@ def copyfile(src, dst, *, follow_symlinks=True):
|
|||
# XXX What about other special files? (sockets, devices...)
|
||||
if stat.S_ISFIFO(st.st_mode):
|
||||
raise SpecialFileError("`%s` is a named pipe" % fn)
|
||||
if _WINDOWS and i == 0:
|
||||
file_size = st.st_size
|
||||
|
||||
if not follow_symlinks and os.path.islink(src):
|
||||
os.symlink(os.readlink(src), dst)
|
||||
else:
|
||||
with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst:
|
||||
if _HAS_SENDFILE:
|
||||
# macOS
|
||||
if _HAS_FCOPYFILE:
|
||||
try:
|
||||
_fastcopy_fcopyfile(fsrc, fdst, posix._COPYFILE_DATA)
|
||||
return dst
|
||||
except _GiveupOnFastCopy:
|
||||
pass
|
||||
# Linux / Solaris
|
||||
elif _HAS_SENDFILE:
|
||||
try:
|
||||
_fastcopy_sendfile(fsrc, fdst)
|
||||
return dst
|
||||
except _GiveupOnFastCopy:
|
||||
pass
|
||||
# Windows, see:
|
||||
# https://github.com/python/cpython/pull/7160#discussion_r195405230
|
||||
elif _WINDOWS and file_size > 0:
|
||||
_copyfileobj_readinto(fsrc, fdst, min(file_size, COPY_BUFSIZE))
|
||||
return dst
|
||||
|
||||
if _HAS_FCOPYFILE:
|
||||
try:
|
||||
_fastcopy_osx(fsrc, fdst, posix._COPYFILE_DATA)
|
||||
return dst
|
||||
except _GiveupOnFastCopy:
|
||||
pass
|
||||
|
||||
_copybinfileobj(fsrc, fdst)
|
||||
copyfileobj(fsrc, fdst)
|
||||
|
||||
return dst
|
||||
|
||||
|
@ -1147,7 +1153,7 @@ if hasattr(os, 'statvfs'):
|
|||
used = (st.f_blocks - st.f_bfree) * st.f_frsize
|
||||
return _ntuple_diskusage(total, used, free)
|
||||
|
||||
elif os.name == 'nt':
|
||||
elif _WINDOWS:
|
||||
|
||||
__all__.append('disk_usage')
|
||||
_ntuple_diskusage = collections.namedtuple('usage', 'total used free')
|
||||
|
|
|
@ -33,7 +33,7 @@ from test import support
|
|||
from test.support import TESTFN, FakePath
|
||||
|
||||
TESTFN2 = TESTFN + "2"
|
||||
OSX = sys.platform.startswith("darwin")
|
||||
MACOS = sys.platform.startswith("darwin")
|
||||
try:
|
||||
import grp
|
||||
import pwd
|
||||
|
@ -1808,7 +1808,7 @@ class TestCopyFile(unittest.TestCase):
|
|||
|
||||
self.assertRaises(OSError, shutil.copyfile, 'srcfile', 'destfile')
|
||||
|
||||
@unittest.skipIf(OSX, "skipped on OSX")
|
||||
@unittest.skipIf(MACOS, "skipped on macOS")
|
||||
def test_w_dest_open_fails(self):
|
||||
|
||||
srcfile = self.Faux()
|
||||
|
@ -1828,7 +1828,7 @@ class TestCopyFile(unittest.TestCase):
|
|||
self.assertEqual(srcfile._exited_with[1].args,
|
||||
('Cannot open "destfile"',))
|
||||
|
||||
@unittest.skipIf(OSX, "skipped on OSX")
|
||||
@unittest.skipIf(MACOS, "skipped on macOS")
|
||||
def test_w_dest_close_fails(self):
|
||||
|
||||
srcfile = self.Faux()
|
||||
|
@ -1851,7 +1851,7 @@ class TestCopyFile(unittest.TestCase):
|
|||
self.assertEqual(srcfile._exited_with[1].args,
|
||||
('Cannot close',))
|
||||
|
||||
@unittest.skipIf(OSX, "skipped on OSX")
|
||||
@unittest.skipIf(MACOS, "skipped on macOS")
|
||||
def test_w_source_close_fails(self):
|
||||
|
||||
srcfile = self.Faux(True)
|
||||
|
@ -1892,6 +1892,80 @@ class TestCopyFile(unittest.TestCase):
|
|||
os.rmdir(dst_dir)
|
||||
|
||||
|
||||
class TestCopyFileObj(unittest.TestCase):
|
||||
FILESIZE = 2 * 1024 * 1024
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
write_test_file(TESTFN, cls.FILESIZE)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
support.unlink(TESTFN)
|
||||
support.unlink(TESTFN2)
|
||||
|
||||
def tearDown(self):
|
||||
support.unlink(TESTFN2)
|
||||
|
||||
@contextlib.contextmanager
|
||||
def get_files(self):
|
||||
with open(TESTFN, "rb") as src:
|
||||
with open(TESTFN2, "wb") as dst:
|
||||
yield (src, dst)
|
||||
|
||||
def assert_files_eq(self, src, dst):
|
||||
with open(src, 'rb') as fsrc:
|
||||
with open(dst, 'rb') as fdst:
|
||||
self.assertEqual(fsrc.read(), fdst.read())
|
||||
|
||||
def test_content(self):
|
||||
with self.get_files() as (src, dst):
|
||||
shutil.copyfileobj(src, dst)
|
||||
self.assert_files_eq(TESTFN, TESTFN2)
|
||||
|
||||
def test_file_not_closed(self):
|
||||
with self.get_files() as (src, dst):
|
||||
shutil.copyfileobj(src, dst)
|
||||
assert not src.closed
|
||||
assert not dst.closed
|
||||
|
||||
def test_file_offset(self):
|
||||
with self.get_files() as (src, dst):
|
||||
shutil.copyfileobj(src, dst)
|
||||
self.assertEqual(src.tell(), self.FILESIZE)
|
||||
self.assertEqual(dst.tell(), self.FILESIZE)
|
||||
|
||||
@unittest.skipIf(os.name != 'nt', "Windows only")
|
||||
def test_win_impl(self):
|
||||
# Make sure alternate Windows implementation is called.
|
||||
with unittest.mock.patch("shutil._copyfileobj_readinto") as m:
|
||||
shutil.copyfile(TESTFN, TESTFN2)
|
||||
assert m.called
|
||||
|
||||
# File size is 2 MiB but max buf size should be 1 MiB.
|
||||
self.assertEqual(m.call_args[0][2], 1 * 1024 * 1024)
|
||||
|
||||
# If file size < 1 MiB memoryview() length must be equal to
|
||||
# the actual file size.
|
||||
with tempfile.NamedTemporaryFile(delete=False) as f:
|
||||
f.write(b'foo')
|
||||
fname = f.name
|
||||
self.addCleanup(support.unlink, fname)
|
||||
with unittest.mock.patch("shutil._copyfileobj_readinto") as m:
|
||||
shutil.copyfile(fname, TESTFN2)
|
||||
self.assertEqual(m.call_args[0][2], 3)
|
||||
|
||||
# Empty files should not rely on readinto() variant.
|
||||
with tempfile.NamedTemporaryFile(delete=False) as f:
|
||||
pass
|
||||
fname = f.name
|
||||
self.addCleanup(support.unlink, fname)
|
||||
with unittest.mock.patch("shutil._copyfileobj_readinto") as m:
|
||||
shutil.copyfile(fname, TESTFN2)
|
||||
assert not m.called
|
||||
self.assert_files_eq(fname, TESTFN2)
|
||||
|
||||
|
||||
class _ZeroCopyFileTest(object):
|
||||
"""Tests common to all zero-copy APIs."""
|
||||
FILESIZE = (10 * 1024 * 1024) # 10 MiB
|
||||
|
@ -2111,12 +2185,12 @@ class TestZeroCopySendfile(_ZeroCopyFileTest, unittest.TestCase):
|
|||
shutil._HAS_SENDFILE = True
|
||||
|
||||
|
||||
@unittest.skipIf(not OSX, 'OSX only')
|
||||
class TestZeroCopyOSX(_ZeroCopyFileTest, unittest.TestCase):
|
||||
@unittest.skipIf(not MACOS, 'macOS only')
|
||||
class TestZeroCopyMACOS(_ZeroCopyFileTest, unittest.TestCase):
|
||||
PATCHPOINT = "posix._fcopyfile"
|
||||
|
||||
def zerocopy_fun(self, src, dst):
|
||||
return shutil._fastcopy_osx(src, dst, posix._COPYFILE_DATA)
|
||||
return shutil._fastcopy_fcopyfile(src, dst, posix._COPYFILE_DATA)
|
||||
|
||||
|
||||
class TermsizeTests(unittest.TestCase):
|
||||
|
|
|
@ -1,11 +1,10 @@
|
|||
:func:`shutil.copyfile`, :func:`shutil.copy`, :func:`shutil.copy2`,
|
||||
:func:`shutil.copytree` and :func:`shutil.move` use platform-specific
|
||||
fast-copy syscalls on Linux, Solaris and OSX in order to copy the file
|
||||
more efficiently. All other platforms not using such technique will rely on a
|
||||
faster :func:`shutil.copyfile` implementation using :func:`memoryview`,
|
||||
:class:`bytearray` and
|
||||
:meth:`BufferedIOBase.readinto() <io.BufferedIOBase.readinto>`.
|
||||
Finally, :func:`shutil.copyfile` default buffer size on Windows was increased
|
||||
from 16KB to 1MB. The speedup for copying a 512MB file is about +26% on Linux,
|
||||
+50% on OSX and +38% on Windows. Also, much less CPU cycles are consumed
|
||||
fast-copy syscalls on Linux, Solaris and macOS in order to copy the file
|
||||
more efficiently.
|
||||
On Windows :func:`shutil.copyfile` uses a bigger default buffer size (1 MiB
|
||||
instead of 16 KiB) and a :func:`memoryview`-based variant of
|
||||
:func:`shutil.copyfileobj` is used.
|
||||
The speedup for copying a 512MiB file is about +26% on Linux, +50% on macOS and
|
||||
+40% on Windows. Also, much less CPU cycles are consumed.
|
||||
(Contributed by Giampaolo Rodola' in :issue:`25427`.)
|
||||
|
|
|
@ -3859,7 +3859,7 @@ PyDoc_STRVAR(os__fcopyfile__doc__,
|
|||
"_fcopyfile($module, infd, outfd, flags, /)\n"
|
||||
"--\n"
|
||||
"\n"
|
||||
"Efficiently copy content or metadata of 2 regular file descriptors (OSX).");
|
||||
"Efficiently copy content or metadata of 2 regular file descriptors (macOS).");
|
||||
|
||||
#define OS__FCOPYFILE_METHODDEF \
|
||||
{"_fcopyfile", (PyCFunction)os__fcopyfile, METH_FASTCALL, os__fcopyfile__doc__},
|
||||
|
@ -6627,4 +6627,4 @@ exit:
|
|||
#ifndef OS_GETRANDOM_METHODDEF
|
||||
#define OS_GETRANDOM_METHODDEF
|
||||
#endif /* !defined(OS_GETRANDOM_METHODDEF) */
|
||||
/*[clinic end generated code: output=b5d1ec71bc6f0651 input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=47fb6a3e88cba6d9 input=a9049054013a1b77]*/
|
||||
|
|
|
@ -8774,12 +8774,12 @@ os._fcopyfile
|
|||
flags: int
|
||||
/
|
||||
|
||||
Efficiently copy content or metadata of 2 regular file descriptors (OSX).
|
||||
Efficiently copy content or metadata of 2 regular file descriptors (macOS).
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
os__fcopyfile_impl(PyObject *module, int infd, int outfd, int flags)
|
||||
/*[clinic end generated code: output=8e8885c721ec38e3 input=aeb9456804eec879]*/
|
||||
/*[clinic end generated code: output=8e8885c721ec38e3 input=69e0770e600cb44f]*/
|
||||
{
|
||||
int ret;
|
||||
|
||||
|
|
Loading…
Reference in New Issue