diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 6c7ae0c785d..75a8ed44e5b 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -1419,6 +1419,38 @@ or `the MSDN `_ on Windo .. versionadded:: 3.3 +.. function:: splice(src, dst, count, offset_src=None, offset_dst=None) + + Transfer *count* bytes from file descriptor *src*, starting from offset + *offset_src*, to file descriptor *dst*, starting from offset *offset_dst*. + At least one of the file descriptors must refer to a pipe. If *offset_src* + is None, then *src* is read from the current position; respectively for + *offset_dst*. The offset associated to the file descriptor that refers to a + pipe must be ``None``. The files pointed by *src* and *dst* must reside in + the same filesystem, otherwise an :exc:`OSError` is raised with + :attr:`~OSError.errno` set to :data:`errno.EXDEV`. + + This copy is done without the additional cost of transferring data + from the kernel to user space and then back into the kernel. Additionally, + some filesystems could implement extra optimizations. The copy is done as if + both files are opened as binary. + + Upon successful completion, returns the number of bytes spliced to or from + the pipe. A return value of 0 means end of input. If *src* refers to a + pipe, then this means that there was no data to transfer, and it would not + make sense to block because there are no writers connected to the write end + of the pipe. + + .. availability:: Linux kernel >= 2.6.17 or glibc >= 2.5 + + .. versionadded:: 3.10 + + +.. data:: SPLICE_F_MOVE + SPLICE_F_NONBLOCK + SPLICE_F_MORE + + .. function:: readv(fd, buffers) Read from a file descriptor *fd* into a number of mutable :term:`bytes-like diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 87ba2e8af74..25b736ec325 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -233,6 +233,11 @@ Added a new function :func:`os.eventfd` and related helpers to wrap the ``eventfd2`` syscall on Linux. (Contributed by Christian Heimes in :issue:`41001`.) +Added :func:`os.splice()` that allows to move data between two file +descriptors without copying between kernel address space and user +address space, where one of the file descriptors must refer to a +pipe. (Contributed by Pablo Galindo in :issue:`41625`.) + py_compile ---------- diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 501b4a97556..d6da4617d50 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -381,6 +381,123 @@ class FileTests(unittest.TestCase): self.assertEqual(read[out_seek:], data[in_skip:in_skip+i]) + @unittest.skipUnless(hasattr(os, 'splice'), 'test needs os.splice()') + def test_splice_invalid_values(self): + with self.assertRaises(ValueError): + os.splice(0, 1, -10) + + @unittest.skipUnless(hasattr(os, 'splice'), 'test needs os.splice()') + def test_splice(self): + TESTFN2 = os_helper.TESTFN + ".3" + data = b'0123456789' + + create_file(os_helper.TESTFN, data) + self.addCleanup(os_helper.unlink, os_helper.TESTFN) + + in_file = open(os_helper.TESTFN, 'rb') + self.addCleanup(in_file.close) + in_fd = in_file.fileno() + + read_fd, write_fd = os.pipe() + self.addCleanup(lambda: os.close(read_fd)) + self.addCleanup(lambda: os.close(write_fd)) + + try: + i = os.splice(in_fd, write_fd, 5) + except OSError as e: + # Handle the case in which Python was compiled + # in a system with the syscall but without support + # in the kernel. + if e.errno != errno.ENOSYS: + raise + self.skipTest(e) + else: + # The number of copied bytes can be less than + # the number of bytes originally requested. + self.assertIn(i, range(0, 6)); + + self.assertEqual(os.read(read_fd, 100), data[:i]) + + @unittest.skipUnless(hasattr(os, 'splice'), 'test needs os.splice()') + def test_splice_offset_in(self): + TESTFN4 = os_helper.TESTFN + ".4" + data = b'0123456789' + bytes_to_copy = 6 + in_skip = 3 + + create_file(os_helper.TESTFN, data) + self.addCleanup(os_helper.unlink, os_helper.TESTFN) + + in_file = open(os_helper.TESTFN, 'rb') + self.addCleanup(in_file.close) + in_fd = in_file.fileno() + + read_fd, write_fd = os.pipe() + self.addCleanup(lambda: os.close(read_fd)) + self.addCleanup(lambda: os.close(write_fd)) + + try: + i = os.splice(in_fd, write_fd, bytes_to_copy, offset_src=in_skip) + except OSError as e: + # Handle the case in which Python was compiled + # in a system with the syscall but without support + # in the kernel. + if e.errno != errno.ENOSYS: + raise + self.skipTest(e) + else: + # The number of copied bytes can be less than + # the number of bytes originally requested. + self.assertIn(i, range(0, bytes_to_copy+1)); + + read = os.read(read_fd, 100) + # 012 are skipped (in_skip) + # 345678 are copied in the file (in_skip + bytes_to_copy) + self.assertEqual(read, data[in_skip:in_skip+i]) + + @unittest.skipUnless(hasattr(os, 'splice'), 'test needs os.splice()') + def test_splice_offset_out(self): + TESTFN4 = os_helper.TESTFN + ".4" + data = b'0123456789' + bytes_to_copy = 6 + out_seek = 3 + + create_file(os_helper.TESTFN, data) + self.addCleanup(os_helper.unlink, os_helper.TESTFN) + + read_fd, write_fd = os.pipe() + self.addCleanup(lambda: os.close(read_fd)) + self.addCleanup(lambda: os.close(write_fd)) + os.write(write_fd, data) + + out_file = open(TESTFN4, 'w+b') + self.addCleanup(os_helper.unlink, TESTFN4) + self.addCleanup(out_file.close) + out_fd = out_file.fileno() + + try: + i = os.splice(read_fd, out_fd, bytes_to_copy, offset_dst=out_seek) + except OSError as e: + # Handle the case in which Python was compiled + # in a system with the syscall but without support + # in the kernel. + if e.errno != errno.ENOSYS: + raise + self.skipTest(e) + else: + # The number of copied bytes can be less than + # the number of bytes originally requested. + self.assertIn(i, range(0, bytes_to_copy+1)); + + with open(TESTFN4, 'rb') as in_file: + read = in_file.read() + # seeked bytes (5) are zero'ed + self.assertEqual(read[:out_seek], b'\x00'*out_seek) + # 012 are skipped (in_skip) + # 345678 are copied in the file (in_skip + bytes_to_copy) + self.assertEqual(read[out_seek:], data[:i]) + + # Test attributes on return values from os.*stat* family. class StatAttributeTests(unittest.TestCase): def setUp(self): diff --git a/Misc/NEWS.d/next/Library/2020-08-24-16-59-04.bpo-41625.Cc967V.rst b/Misc/NEWS.d/next/Library/2020-08-24-16-59-04.bpo-41625.Cc967V.rst new file mode 100644 index 00000000000..086788a7b01 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-08-24-16-59-04.bpo-41625.Cc967V.rst @@ -0,0 +1,2 @@ +Expose the :c:func:`splice` as :func:`os.splice` in the :mod:`os` module. +Patch by Pablo Galindo diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index f5826e36812..ee4ee8ceac5 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -5674,6 +5674,106 @@ exit: #endif /* defined(HAVE_COPY_FILE_RANGE) */ +#if defined(HAVE_SPLICE) + +PyDoc_STRVAR(os_splice__doc__, +"splice($module, /, src, dst, count, offset_src=None, offset_dst=None,\n" +" flags=0)\n" +"--\n" +"\n" +"Transfer count bytes from one pipe to a descriptor or vice versa.\n" +"\n" +" src\n" +" Source file descriptor.\n" +" dst\n" +" Destination file descriptor.\n" +" count\n" +" Number of bytes to copy.\n" +" offset_src\n" +" Starting offset in src.\n" +" offset_dst\n" +" Starting offset in dst.\n" +" flags\n" +" Flags to modify the semantics of the call.\n" +"\n" +"If offset_src is None, then src is read from the current position;\n" +"respectively for offset_dst. The offset associated to the file\n" +"descriptor that refers to a pipe must be None."); + +#define OS_SPLICE_METHODDEF \ + {"splice", (PyCFunction)(void(*)(void))os_splice, METH_FASTCALL|METH_KEYWORDS, os_splice__doc__}, + +static PyObject * +os_splice_impl(PyObject *module, int src, int dst, Py_ssize_t count, + PyObject *offset_src, PyObject *offset_dst, + unsigned int flags); + +static PyObject * +os_splice(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"src", "dst", "count", "offset_src", "offset_dst", "flags", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "splice", 0}; + PyObject *argsbuf[6]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 3; + int src; + int dst; + Py_ssize_t count; + PyObject *offset_src = Py_None; + PyObject *offset_dst = Py_None; + unsigned int flags = 0; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 3, 6, 0, argsbuf); + if (!args) { + goto exit; + } + src = _PyLong_AsInt(args[0]); + if (src == -1 && PyErr_Occurred()) { + goto exit; + } + dst = _PyLong_AsInt(args[1]); + if (dst == -1 && PyErr_Occurred()) { + goto exit; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[2]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + count = ival; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (args[3]) { + offset_src = args[3]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (args[4]) { + offset_dst = args[4]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + if (!_PyLong_UnsignedInt_Converter(args[5], &flags)) { + goto exit; + } +skip_optional_pos: + return_value = os_splice_impl(module, src, dst, count, offset_src, offset_dst, flags); + +exit: + return return_value; +} + +#endif /* defined(HAVE_SPLICE) */ + #if defined(HAVE_MKFIFO) PyDoc_STRVAR(os_mkfifo__doc__, @@ -8864,6 +8964,10 @@ exit: #define OS_COPY_FILE_RANGE_METHODDEF #endif /* !defined(OS_COPY_FILE_RANGE_METHODDEF) */ +#ifndef OS_SPLICE_METHODDEF + #define OS_SPLICE_METHODDEF +#endif /* !defined(OS_SPLICE_METHODDEF) */ + #ifndef OS_MKFIFO_METHODDEF #define OS_MKFIFO_METHODDEF #endif /* !defined(OS_MKFIFO_METHODDEF) */ @@ -9059,4 +9163,4 @@ exit: #ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF #define OS_WAITSTATUS_TO_EXITCODE_METHODDEF #endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */ -/*[clinic end generated code: output=49b7ed768242ef7c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8a59e91178897267 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 0764453f412..ecab147d2f3 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -6521,7 +6521,6 @@ os_spawnve_impl(PyObject *module, int mode, path_t *path, PyObject *argv, #endif /* HAVE_SPAWNV */ - #ifdef HAVE_FORK /* Helper function to validate arguments. @@ -10370,6 +10369,75 @@ os_copy_file_range_impl(PyObject *module, int src, int dst, Py_ssize_t count, } #endif /* HAVE_COPY_FILE_RANGE*/ +#ifdef HAVE_SPLICE +/*[clinic input] + +os.splice + src: int + Source file descriptor. + dst: int + Destination file descriptor. + count: Py_ssize_t + Number of bytes to copy. + offset_src: object = None + Starting offset in src. + offset_dst: object = None + Starting offset in dst. + flags: unsigned_int = 0 + Flags to modify the semantics of the call. + +Transfer count bytes from one pipe to a descriptor or vice versa. + +If offset_src is None, then src is read from the current position; +respectively for offset_dst. The offset associated to the file +descriptor that refers to a pipe must be None. +[clinic start generated code]*/ + +static PyObject * +os_splice_impl(PyObject *module, int src, int dst, Py_ssize_t count, + PyObject *offset_src, PyObject *offset_dst, + unsigned int flags) +/*[clinic end generated code: output=d0386f25a8519dc5 input=047527c66c6d2e0a]*/ +{ + off_t offset_src_val, offset_dst_val; + off_t *p_offset_src = NULL; + off_t *p_offset_dst = NULL; + Py_ssize_t ret; + int async_err = 0; + + if (count < 0) { + PyErr_SetString(PyExc_ValueError, "negative value for 'count' not allowed"); + return NULL; + } + + if (offset_src != Py_None) { + if (!Py_off_t_converter(offset_src, &offset_src_val)) { + return NULL; + } + p_offset_src = &offset_src_val; + } + + if (offset_dst != Py_None) { + if (!Py_off_t_converter(offset_dst, &offset_dst_val)) { + return NULL; + } + p_offset_dst = &offset_dst_val; + } + + do { + Py_BEGIN_ALLOW_THREADS + ret = splice(src, p_offset_src, dst, p_offset_dst, count, flags); + Py_END_ALLOW_THREADS + } while (ret < 0 && errno == EINTR && !(async_err = PyErr_CheckSignals())); + + if (ret < 0) { + return (!async_err) ? posix_error() : NULL; + } + + return PyLong_FromSsize_t(ret); +} +#endif /* HAVE_SPLICE*/ + #ifdef HAVE_MKFIFO /*[clinic input] os.mkfifo @@ -14550,6 +14618,7 @@ static PyMethodDef posix_methods[] = { OS_POSIX_SPAWNP_METHODDEF OS_READLINK_METHODDEF OS_COPY_FILE_RANGE_METHODDEF + OS_SPLICE_METHODDEF OS_RENAME_METHODDEF OS_REPLACE_METHODDEF OS_RMDIR_METHODDEF @@ -15072,6 +15141,13 @@ all_ins(PyObject *m) if (PyModule_AddIntConstant(m, "RWF_APPEND", RWF_APPEND)) return -1; #endif +/* constants for splice */ +#ifdef HAVE_SPLICE + if (PyModule_AddIntConstant(m, "SPLICE_F_MOVE", SPLICE_F_MOVE)) return -1; + if (PyModule_AddIntConstant(m, "SPLICE_F_NONBLOCK", SPLICE_F_NONBLOCK)) return -1; + if (PyModule_AddIntConstant(m, "SPLICE_F_MORE", SPLICE_F_MORE)) return -1; +#endif + /* constants for posix_spawn */ #ifdef HAVE_POSIX_SPAWN if (PyModule_AddIntConstant(m, "POSIX_SPAWN_OPEN", POSIX_SPAWN_OPEN)) return -1; diff --git a/aclocal.m4 b/aclocal.m4 index 99913e7f3b8..c003f3c7055 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -67,7 +67,7 @@ AS_VAR_POPDEF([CACHEVAR])dnl ])dnl AX_CHECK_COMPILE_FLAGS # pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- -# serial 11 (pkg-config-0.29.1) +# serial 12 (pkg-config-0.29.2) dnl Copyright © 2004 Scott James Remnant . dnl Copyright © 2012-2015 Dan Nicholson @@ -109,7 +109,7 @@ dnl dnl See the "Since" comment for each macro you use to see what version dnl of the macros you require. m4_defun([PKG_PREREQ], -[m4_define([PKG_MACROS_VERSION], [0.29.1]) +[m4_define([PKG_MACROS_VERSION], [0.29.2]) m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1, [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])]) ])dnl PKG_PREREQ @@ -210,7 +210,7 @@ AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl pkg_failed=no -AC_MSG_CHECKING([for $1]) +AC_MSG_CHECKING([for $2]) _PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) _PKG_CONFIG([$1][_LIBS], [libs], [$2]) @@ -220,11 +220,11 @@ and $1[]_LIBS to avoid the need to call pkg-config. See the pkg-config man page for more details.]) if test $pkg_failed = yes; then - AC_MSG_RESULT([no]) + AC_MSG_RESULT([no]) _PKG_SHORT_ERRORS_SUPPORTED if test $_pkg_short_errors_supported = yes; then $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` - else + else $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` fi # Put the nasty error message in config.log where it belongs @@ -241,7 +241,7 @@ installed software in a non-standard prefix. _PKG_TEXT])[]dnl ]) elif test $pkg_failed = untried; then - AC_MSG_RESULT([no]) + AC_MSG_RESULT([no]) m4_default([$4], [AC_MSG_FAILURE( [The pkg-config script could not be found or is too old. Make sure it is in your PATH or set the PKG_CONFIG environment variable to the full @@ -342,73 +342,5 @@ AS_VAR_COPY([$1], [pkg_cv_][$1]) AS_VAR_IF([$1], [""], [$5], [$4])dnl ])dnl PKG_CHECK_VAR -dnl PKG_WITH_MODULES(VARIABLE-PREFIX, MODULES, -dnl [ACTION-IF-FOUND],[ACTION-IF-NOT-FOUND], -dnl [DESCRIPTION], [DEFAULT]) -dnl ------------------------------------------ -dnl -dnl Prepare a "--with-" configure option using the lowercase -dnl [VARIABLE-PREFIX] name, merging the behaviour of AC_ARG_WITH and -dnl PKG_CHECK_MODULES in a single macro. -AC_DEFUN([PKG_WITH_MODULES], -[ -m4_pushdef([with_arg], m4_tolower([$1])) - -m4_pushdef([description], - [m4_default([$5], [build with ]with_arg[ support])]) - -m4_pushdef([def_arg], [m4_default([$6], [auto])]) -m4_pushdef([def_action_if_found], [AS_TR_SH([with_]with_arg)=yes]) -m4_pushdef([def_action_if_not_found], [AS_TR_SH([with_]with_arg)=no]) - -m4_case(def_arg, - [yes],[m4_pushdef([with_without], [--without-]with_arg)], - [m4_pushdef([with_without],[--with-]with_arg)]) - -AC_ARG_WITH(with_arg, - AS_HELP_STRING(with_without, description[ @<:@default=]def_arg[@:>@]),, - [AS_TR_SH([with_]with_arg)=def_arg]) - -AS_CASE([$AS_TR_SH([with_]with_arg)], - [yes],[PKG_CHECK_MODULES([$1],[$2],$3,$4)], - [auto],[PKG_CHECK_MODULES([$1],[$2], - [m4_n([def_action_if_found]) $3], - [m4_n([def_action_if_not_found]) $4])]) - -m4_popdef([with_arg]) -m4_popdef([description]) -m4_popdef([def_arg]) - -])dnl PKG_WITH_MODULES - -dnl PKG_HAVE_WITH_MODULES(VARIABLE-PREFIX, MODULES, -dnl [DESCRIPTION], [DEFAULT]) -dnl ----------------------------------------------- -dnl -dnl Convenience macro to trigger AM_CONDITIONAL after PKG_WITH_MODULES -dnl check._[VARIABLE-PREFIX] is exported as make variable. -AC_DEFUN([PKG_HAVE_WITH_MODULES], -[ -PKG_WITH_MODULES([$1],[$2],,,[$3],[$4]) - -AM_CONDITIONAL([HAVE_][$1], - [test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"]) -])dnl PKG_HAVE_WITH_MODULES - -dnl PKG_HAVE_DEFINE_WITH_MODULES(VARIABLE-PREFIX, MODULES, -dnl [DESCRIPTION], [DEFAULT]) -dnl ------------------------------------------------------ -dnl -dnl Convenience macro to run AM_CONDITIONAL and AC_DEFINE after -dnl PKG_WITH_MODULES check. HAVE_[VARIABLE-PREFIX] is exported as make -dnl and preprocessor variable. -AC_DEFUN([PKG_HAVE_DEFINE_WITH_MODULES], -[ -PKG_HAVE_WITH_MODULES([$1],[$2],[$3],[$4]) - -AS_IF([test "$AS_TR_SH([with_]m4_tolower([$1]))" = "yes"], - [AC_DEFINE([HAVE_][$1], 1, [Enable ]m4_tolower([$1])[ support])]) -])dnl PKG_HAVE_DEFINE_WITH_MODULES - m4_include([m4/ax_c_float_words_bigendian.m4]) m4_include([m4/ax_check_openssl.m4]) diff --git a/configure b/configure index 491869ef2cb..e665d135e67 100755 --- a/configure +++ b/configure @@ -11726,7 +11726,7 @@ for ac_func in alarm accept4 setitimer getitimer bind_textdomain_codeset chown \ sched_get_priority_max sched_setaffinity sched_setscheduler sched_setparam \ sched_rr_get_interval \ sigaction sigaltstack sigfillset siginterrupt sigpending sigrelse \ - sigtimedwait sigwait sigwaitinfo snprintf strftime strlcpy strsignal symlinkat sync \ + sigtimedwait sigwait sigwaitinfo snprintf splice strftime strlcpy strsignal symlinkat sync \ sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \ truncate uname unlinkat utimensat utimes vfork waitid waitpid wait3 wait4 \ wcscoll wcsftime wcsxfrm wmemcmp writev _getpty rtpSpawn diff --git a/configure.ac b/configure.ac index 440cdd178e1..668715cdf8f 100644 --- a/configure.ac +++ b/configure.ac @@ -3684,7 +3684,7 @@ AC_CHECK_FUNCS(alarm accept4 setitimer getitimer bind_textdomain_codeset chown \ sched_get_priority_max sched_setaffinity sched_setscheduler sched_setparam \ sched_rr_get_interval \ sigaction sigaltstack sigfillset siginterrupt sigpending sigrelse \ - sigtimedwait sigwait sigwaitinfo snprintf strftime strlcpy strsignal symlinkat sync \ + sigtimedwait sigwait sigwaitinfo snprintf splice strftime strlcpy strsignal symlinkat sync \ sysconf tcgetpgrp tcsetpgrp tempnam timegm times tmpfile tmpnam tmpnam_r \ truncate uname unlinkat utimensat utimes vfork waitid waitpid wait3 wait4 \ wcscoll wcsftime wcsxfrm wmemcmp writev _getpty rtpSpawn) diff --git a/pyconfig.h.in b/pyconfig.h.in index 8a5e9455eca..6ff5fc968a3 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1018,6 +1018,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_SPAWN_H +/* Define to 1 if you have the `splice' function. */ +#undef HAVE_SPLICE + /* Define if your compiler provides ssize_t */ #undef HAVE_SSIZE_T