From a371a7e03e43e08cae70235a71904989c0f57a5e Mon Sep 17 00:00:00 2001 From: Noam Cohen Date: Thu, 20 Oct 2022 12:08:54 +0300 Subject: [PATCH] gh-95023: Added os.setns and os.unshare functions (#95046) Added os.setns and os.unshare to easily switch between namespaces on Linux. Co-authored-by: Christian Heimes Co-authored-by: CAM Gerlach Co-authored-by: Victor Stinner --- Doc/library/os.rst | 81 ++++++++++ Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 7 + Lib/test/test_posix.py | 47 ++++++ Misc/ACKS | 1 + ...2-07-20-09-04-55.gh-issue-95023.bs-xd7.rst | 1 + Modules/clinic/posixmodule.c.h | 151 +++++++++++++++++- Modules/posixmodule.c | 107 +++++++++++++ configure | 14 ++ configure.ac | 3 + pyconfig.h.in | 6 + 11 files changed, 418 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-07-20-09-04-55.gh-issue-95023.bs-xd7.rst diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 43066fa1e13..0f0fb55e315 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -590,6 +590,44 @@ process and user. See the documentation for :func:`getgroups` for cases where it may not return the same group list set by calling setgroups(). +.. function:: setns(fd, nstype=0) + + Reassociate the current thread with a Linux namespace. + See the :manpage:`setns(2)` and :manpage:`namespaces(7)` man pages for more + details. + + If *fd* refers to a :file:`/proc/{pid}/ns/` link, ``setns()`` reassociates the + calling thread with the namespace associated with that link, + and *nstype* may be set to one of the + :ref:`CLONE_NEW* constants ` + to impose constraints on the operation + (``0`` means no constraints). + + Since Linux 5.8, *fd* may refer to a PID file descriptor obtained from + :func:`~os.pidfd_open`. In this case, ``setns()`` reassociates the calling thread + into one or more of the same namespaces as the thread referred to by *fd*. + This is subject to any constraints imposed by *nstype*, + which is a bit mask combining one or more of the + :ref:`CLONE_NEW* constants `, + e.g. ``setns(fd, os.CLONE_NEWUTS | os.CLONE_NEWPID)``. + The caller's memberships in unspecified namespaces are left unchanged. + + *fd* can be any object with a :meth:`~io.IOBase.fileno` method, or a raw file descriptor. + + This example reassociates the thread with the ``init`` process's network namespace:: + + fd = os.open("/proc/1/ns/net", os.O_RDONLY) + os.setns(fd, os.CLONE_NEWNET) + os.close(fd) + + .. availability:: Linux >= 3.0 with glibc >= 2.14. + + .. versionadded:: 3.12 + + .. seealso:: + + The :func:`~os.unshare` function. + .. function:: setpgrp() Call the system call :c:func:`setpgrp` or ``setpgrp(0, 0)`` depending on @@ -756,6 +794,49 @@ process and user. The function is now always available and is also available on Windows. +.. function:: unshare(flags) + + Disassociate parts of the process execution context, and move them into a + newly created namespace. + See the :manpage:`unshare(2)` + man page for more details. + The *flags* argument is a bit mask, combining zero or more of the + :ref:`CLONE_* constants `, + that specifies which parts of the execution context should be + unshared from their existing associations and moved to a new namespace. + If the *flags* argument is ``0``, no changes are made to the calling process's + execution context. + + .. availability:: Linux >= 2.6.16. + + .. versionadded:: 3.12 + + .. seealso:: + + The :func:`~os.setns` function. + +.. _os-unshare-clone-flags: + +Flags to the :func:`unshare` function, if the implementation supports them. +See :manpage:`unshare(2)` in the Linux manual +for their exact effect and availability. + +.. data:: CLONE_FILES + CLONE_FS + CLONE_NEWCGROUP + CLONE_NEWIPC + CLONE_NEWNET + CLONE_NEWNS + CLONE_NEWPID + CLONE_NEWTIME + CLONE_NEWUSER + CLONE_NEWUTS + CLONE_SIGHAND + CLONE_SYSVSEM + CLONE_THREAD + CLONE_VM + + .. _os-newstreams: File Object Creation diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 811cfc147fc..9c716a3012f 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -489,6 +489,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(node_depth) STRUCT_FOR_ID(node_offset) STRUCT_FOR_ID(ns) + STRUCT_FOR_ID(nstype) STRUCT_FOR_ID(number) STRUCT_FOR_ID(obj) STRUCT_FOR_ID(object) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 8ce95884ccd..55c7c9e3194 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -998,6 +998,7 @@ extern "C" { INIT_ID(node_depth), \ INIT_ID(node_offset), \ INIT_ID(ns), \ + INIT_ID(nstype), \ INIT_ID(number), \ INIT_ID(obj), \ INIT_ID(object), \ @@ -2307,6 +2308,8 @@ _PyUnicode_InitStaticStrings(void) { PyUnicode_InternInPlace(&string); string = &_Py_ID(ns); PyUnicode_InternInPlace(&string); + string = &_Py_ID(nstype); + PyUnicode_InternInPlace(&string); string = &_Py_ID(number); PyUnicode_InternInPlace(&string); string = &_Py_ID(obj); @@ -6546,6 +6549,10 @@ _PyStaticObjects_CheckRefcnt(void) { _PyObject_Dump((PyObject *)&_Py_ID(ns)); Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT"); }; + if (Py_REFCNT((PyObject *)&_Py_ID(nstype)) < _PyObject_IMMORTAL_REFCNT) { + _PyObject_Dump((PyObject *)&_Py_ID(nstype)); + Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT"); + }; if (Py_REFCNT((PyObject *)&_Py_ID(number)) < _PyObject_IMMORTAL_REFCNT) { _PyObject_Dump((PyObject *)&_Py_ID(number)); Py_FatalError("immortal object has less refcnt than expected _PyObject_IMMORTAL_REFCNT"); diff --git a/Lib/test/test_posix.py b/Lib/test/test_posix.py index be561afc4cf..442dec8b280 100644 --- a/Lib/test/test_posix.py +++ b/Lib/test/test_posix.py @@ -2191,6 +2191,53 @@ class TestPosixWeaklinking(unittest.TestCase): os.utime("path", dir_fd=0) +class NamespacesTests(unittest.TestCase): + """Tests for os.unshare() and os.setns().""" + + @unittest.skipUnless(hasattr(os, 'unshare'), 'needs os.unshare()') + @unittest.skipUnless(hasattr(os, 'setns'), 'needs os.setns()') + @unittest.skipUnless(os.path.exists('/proc/self/ns/uts'), 'need /proc/self/ns/uts') + @support.requires_linux_version(3, 0, 0) + def test_unshare_setns(self): + code = """if 1: + import errno + import os + import sys + fd = os.open('/proc/self/ns/uts', os.O_RDONLY) + try: + original = os.readlink('/proc/self/ns/uts') + try: + os.unshare(os.CLONE_NEWUTS) + except OSError as e: + if e.errno == errno.ENOSPC: + # skip test if limit is exceeded + sys.exit() + raise + new = os.readlink('/proc/self/ns/uts') + if original == new: + raise Exception('os.unshare failed') + os.setns(fd, os.CLONE_NEWUTS) + restored = os.readlink('/proc/self/ns/uts') + if original != restored: + raise Exception('os.setns failed') + except PermissionError: + # The calling process did not have the required privileges + # for this operation + pass + except OSError as e: + # Skip the test on these errors: + # - ENOSYS: syscall not available + # - EINVAL: kernel was not configured with the CONFIG_UTS_NS option + # - ENOMEM: not enough memory + if e.errno not in (errno.ENOSYS, errno.EINVAL, errno.ENOMEM): + raise + finally: + os.close(fd) + """ + + assert_python_ok("-c", code) + + def tearDownModule(): support.reap_children() diff --git a/Misc/ACKS b/Misc/ACKS index ec5e326847b..98531485157 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -344,6 +344,7 @@ Hervé Coatanhay Riccardo Coccioli Nick Coghlan Josh Cogliati +Noam Cohen Dave Cole Terrence Cole Benjamin Collar diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-20-09-04-55.gh-issue-95023.bs-xd7.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-20-09-04-55.gh-issue-95023.bs-xd7.rst new file mode 100644 index 00000000000..bf0558ba79c --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-07-20-09-04-55.gh-issue-95023.bs-xd7.rst @@ -0,0 +1 @@ +Implement :func:`os.setns` and :func:`os.unshare` for Linux. Patch by Noam Cohen. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 31bd01d1c3c..1ad96ea296e 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -5211,6 +5211,147 @@ exit: #endif /* (defined(__linux__) && defined(__NR_pidfd_open)) */ +#if defined(HAVE_SETNS) + +PyDoc_STRVAR(os_setns__doc__, +"setns($module, /, fd, nstype=0)\n" +"--\n" +"\n" +"Move the calling thread into different namespaces.\n" +"\n" +" fd\n" +" A file descriptor to a namespace.\n" +" nstype\n" +" Type of namespace."); + +#define OS_SETNS_METHODDEF \ + {"setns", _PyCFunction_CAST(os_setns), METH_FASTCALL|METH_KEYWORDS, os_setns__doc__}, + +static PyObject * +os_setns_impl(PyObject *module, int fd, int nstype); + +static PyObject * +os_setns(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(fd), &_Py_ID(nstype), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"fd", "nstype", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "setns", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; + int fd; + int nstype = 0; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf); + if (!args) { + goto exit; + } + if (!_PyLong_FileDescriptor_Converter(args[0], &fd)) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + nstype = _PyLong_AsInt(args[1]); + if (nstype == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional_pos: + return_value = os_setns_impl(module, fd, nstype); + +exit: + return return_value; +} + +#endif /* defined(HAVE_SETNS) */ + +#if defined(HAVE_UNSHARE) + +PyDoc_STRVAR(os_unshare__doc__, +"unshare($module, /, flags)\n" +"--\n" +"\n" +"Disassociate parts of a process (or thread) execution context.\n" +"\n" +" flags\n" +" Namespaces to be unshared."); + +#define OS_UNSHARE_METHODDEF \ + {"unshare", _PyCFunction_CAST(os_unshare), METH_FASTCALL|METH_KEYWORDS, os_unshare__doc__}, + +static PyObject * +os_unshare_impl(PyObject *module, int flags); + +static PyObject * +os_unshare(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(flags), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"flags", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "unshare", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + int flags; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + flags = _PyLong_AsInt(args[0]); + if (flags == -1 && PyErr_Occurred()) { + goto exit; + } + return_value = os_unshare_impl(module, flags); + +exit: + return return_value; +} + +#endif /* defined(HAVE_UNSHARE) */ + #if (defined(HAVE_READLINK) || defined(MS_WINDOWS)) PyDoc_STRVAR(os_readlink__doc__, @@ -11085,6 +11226,14 @@ exit: #define OS_PIDFD_OPEN_METHODDEF #endif /* !defined(OS_PIDFD_OPEN_METHODDEF) */ +#ifndef OS_SETNS_METHODDEF + #define OS_SETNS_METHODDEF +#endif /* !defined(OS_SETNS_METHODDEF) */ + +#ifndef OS_UNSHARE_METHODDEF + #define OS_UNSHARE_METHODDEF +#endif /* !defined(OS_UNSHARE_METHODDEF) */ + #ifndef OS_READLINK_METHODDEF #define OS_READLINK_METHODDEF #endif /* !defined(OS_READLINK_METHODDEF) */ @@ -11368,4 +11517,4 @@ exit: #ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF #define OS_WAITSTATUS_TO_EXITCODE_METHODDEF #endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */ -/*[clinic end generated code: output=471ab8f2ad3d46b0 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=90f5e6995114e5ca input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 39198cb7ddc..56ea319ecb3 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -8581,6 +8581,64 @@ os_pidfd_open_impl(PyObject *module, pid_t pid, unsigned int flags) #endif +#ifdef HAVE_SETNS +/*[clinic input] +os.setns + fd: fildes + A file descriptor to a namespace. + nstype: int = 0 + Type of namespace. + +Move the calling thread into different namespaces. +[clinic start generated code]*/ + +static PyObject * +os_setns_impl(PyObject *module, int fd, int nstype) +/*[clinic end generated code: output=5dbd055bfb66ecd0 input=42787871226bf3ee]*/ +{ + int res; + + Py_BEGIN_ALLOW_THREADS + res = setns(fd, nstype); + Py_END_ALLOW_THREADS + + if (res != 0) { + return posix_error(); + } + + Py_RETURN_NONE; +} +#endif + + +#ifdef HAVE_UNSHARE +/*[clinic input] +os.unshare + flags: int + Namespaces to be unshared. + +Disassociate parts of a process (or thread) execution context. +[clinic start generated code]*/ + +static PyObject * +os_unshare_impl(PyObject *module, int flags) +/*[clinic end generated code: output=1b3177906dd237ee input=9e065db3232b8b1b]*/ +{ + int res; + + Py_BEGIN_ALLOW_THREADS + res = unshare(flags); + Py_END_ALLOW_THREADS + + if (res != 0) { + return posix_error(); + } + + Py_RETURN_NONE; +} +#endif + + #if defined(HAVE_READLINK) || defined(MS_WINDOWS) /*[clinic input] os.readlink @@ -14945,6 +15003,8 @@ static PyMethodDef posix_methods[] = { OS__ADD_DLL_DIRECTORY_METHODDEF OS__REMOVE_DLL_DIRECTORY_METHODDEF OS_WAITSTATUS_TO_EXITCODE_METHODDEF + OS_SETNS_METHODDEF + OS_UNSHARE_METHODDEF {NULL, NULL} /* Sentinel */ }; @@ -15390,6 +15450,53 @@ all_ins(PyObject *m) #ifdef SCHED_FX if (PyModule_AddIntConstant(m, "SCHED_FX", SCHED_FSS)) return -1; #endif + +/* constants for namespaces */ +#if defined(HAVE_SETNS) || defined(HAVE_UNSHARE) +#ifdef CLONE_FS + if (PyModule_AddIntMacro(m, CLONE_FS)) return -1; +#endif +#ifdef CLONE_FILES + if (PyModule_AddIntMacro(m, CLONE_FILES)) return -1; +#endif +#ifdef CLONE_NEWNS + if (PyModule_AddIntMacro(m, CLONE_NEWNS)) return -1; +#endif +#ifdef CLONE_NEWCGROUP + if (PyModule_AddIntMacro(m, CLONE_NEWCGROUP)) return -1; +#endif +#ifdef CLONE_NEWUTS + if (PyModule_AddIntMacro(m, CLONE_NEWUTS)) return -1; +#endif +#ifdef CLONE_NEWIPC + if (PyModule_AddIntMacro(m, CLONE_NEWIPC)) return -1; +#endif +#ifdef CLONE_NEWUSER + if (PyModule_AddIntMacro(m, CLONE_NEWUSER)) return -1; +#endif +#ifdef CLONE_NEWPID + if (PyModule_AddIntMacro(m, CLONE_NEWPID)) return -1; +#endif +#ifdef CLONE_NEWNET + if (PyModule_AddIntMacro(m, CLONE_NEWNET)) return -1; +#endif +#ifdef CLONE_NEWTIME + if (PyModule_AddIntMacro(m, CLONE_NEWTIME)) return -1; +#endif +#ifdef CLONE_SYSVSEM + if (PyModule_AddIntMacro(m, CLONE_SYSVSEM)) return -1; +#endif +#ifdef CLONE_THREAD + if (PyModule_AddIntMacro(m, CLONE_THREAD)) return -1; +#endif +#ifdef CLONE_SIGHAND + if (PyModule_AddIntMacro(m, CLONE_SIGHAND)) return -1; +#endif +#ifdef CLONE_VM + if (PyModule_AddIntMacro(m, CLONE_VM)) return -1; +#endif +#endif + #endif #ifdef USE_XATTRS diff --git a/configure b/configure index 0e9f72faa7b..15d97963742 100755 --- a/configure +++ b/configure @@ -19111,6 +19111,20 @@ fi done +# check for namespace functions +for ac_func in setns unshare +do : + as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh` +ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var" +if eval test \"x\$"$as_ac_var"\" = x"yes"; then : + cat >>confdefs.h <<_ACEOF +#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1 +_ACEOF + +fi +done + + diff --git a/configure.ac b/configure.ac index a4f3b0ab846..c7945aaf850 100644 --- a/configure.ac +++ b/configure.ac @@ -5083,6 +5083,9 @@ AC_CHECK_FUNCS(setpgrp, []) ) +# check for namespace functions +AC_CHECK_FUNCS([setns unshare]) + dnl We search for both crypt and crypt_r as one or the other may be defined dnl libxcrypt provides and libcrypt with crypt_r() since dnl at least 3.1.1 from 2015. diff --git a/pyconfig.h.in b/pyconfig.h.in index 1ce09855f55..0d3c851a1af 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1067,6 +1067,9 @@ /* Define to 1 if you have the `setlocale' function. */ #undef HAVE_SETLOCALE +/* Define to 1 if you have the `setns' function. */ +#undef HAVE_SETNS + /* Define to 1 if you have the `setpgid' function. */ #undef HAVE_SETPGID @@ -1437,6 +1440,9 @@ /* Define to 1 if you have the `unlinkat' function. */ #undef HAVE_UNLINKAT +/* Define to 1 if you have the `unshare' function. */ +#undef HAVE_UNSHARE + /* Define if you have a useable wchar_t type defined in wchar.h; useable means wchar_t must be an unsigned type with at least 16 bits. (see Include/unicodeobject.h). */