From 6ec29e299b663662a1610a740c4c7f8066fc63a6 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Fri, 16 Dec 2011 14:46:36 +0100 Subject: [PATCH] Issue #8373: The filesystem path of AF_UNIX sockets now uses the filesystem encoding and the surrogateescape error handler, rather than UTF-8. Patch by David Watson. --- Doc/library/socket.rst | 18 ++++++++++-- Lib/test/test_socket.py | 63 ++++++++++++++++++++++++++++++++++++++++- Misc/NEWS | 4 +++ Modules/socketmodule.c | 25 ++++++++++++---- 4 files changed, 101 insertions(+), 9 deletions(-) diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst index f507958fab7..77f5bb8636d 100644 --- a/Doc/library/socket.rst +++ b/Doc/library/socket.rst @@ -40,9 +40,23 @@ Socket families Depending on the system and the build options, various socket families are supported by this module. -Socket addresses are represented as follows: +The address format required by a particular socket object is automatically +selected based on the address family specified when the socket object was +created. Socket addresses are represented as follows: -- A single string is used for the :const:`AF_UNIX` address family. +- The address of an :const:`AF_UNIX` socket bound to a file system node + is represented as a string, using the file system encoding and the + ``'surrogateescape'`` error handler (see :pep:`383`). An address in + Linux's abstract namespace is returned as a :class:`bytes` object with + an initial null byte; note that sockets in this namespace can + communicate with normal file system sockets, so programs intended to + run on Linux may need to deal with both types of address. A string or + :class:`bytes` object can be used for either type of address when + passing it as an argument. + + .. versionchanged:: 3.3 + Previously, :const:`AF_UNIX` socket paths were assumed to use UTF-8 + encoding. - A pair ``(host, port)`` is used for the :const:`AF_INET` address family, where *host* is a string representing either a hostname in Internet domain diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py index 221a507d334..78bb37cfce8 100644 --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -1538,7 +1538,6 @@ class BasicUDPTest(ThreadedUDPSocketTest): def _testRecvFromNegative(self): self.cli.sendto(MSG, 0, (HOST, self.port)) - # Tests for the sendmsg()/recvmsg() interface. Where possible, the # same test code is used with different families and types of socket # (e.g. stream, datagram), and tests using recvmsg() are repeated @@ -4241,6 +4240,66 @@ class TestLinuxAbstractNamespace(unittest.TestCase): with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as s: self.assertRaises(socket.error, s.bind, address) + def testStrName(self): + # Check that an abstract name can be passed as a string. + s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + try: + s.bind("\x00python\x00test\x00") + self.assertEqual(s.getsockname(), b"\x00python\x00test\x00") + finally: + s.close() + +class TestUnixDomain(unittest.TestCase): + + def setUp(self): + self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + + def tearDown(self): + self.sock.close() + + def encoded(self, path): + # Return the given path encoded in the file system encoding, + # or skip the test if this is not possible. + try: + return os.fsencode(path) + except UnicodeEncodeError: + self.skipTest( + "Pathname {0!a} cannot be represented in file " + "system encoding {1!r}".format( + path, sys.getfilesystemencoding())) + + def testStrAddr(self): + # Test binding to and retrieving a normal string pathname. + path = os.path.abspath(support.TESTFN) + self.sock.bind(path) + self.addCleanup(support.unlink, path) + self.assertEqual(self.sock.getsockname(), path) + + def testBytesAddr(self): + # Test binding to a bytes pathname. + path = os.path.abspath(support.TESTFN) + self.sock.bind(self.encoded(path)) + self.addCleanup(support.unlink, path) + self.assertEqual(self.sock.getsockname(), path) + + def testSurrogateescapeBind(self): + # Test binding to a valid non-ASCII pathname, with the + # non-ASCII bytes supplied using surrogateescape encoding. + path = os.path.abspath(support.TESTFN_UNICODE) + b = self.encoded(path) + self.sock.bind(b.decode("ascii", "surrogateescape")) + self.addCleanup(support.unlink, path) + self.assertEqual(self.sock.getsockname(), path) + + def testUnencodableAddr(self): + # Test binding to a pathname that cannot be encoded in the + # file system encoding. + if support.TESTFN_UNENCODABLE is None: + self.skipTest("No unencodable filename available") + path = os.path.abspath(support.TESTFN_UNENCODABLE) + self.sock.bind(path) + self.addCleanup(support.unlink, path) + self.assertEqual(self.sock.getsockname(), path) @unittest.skipUnless(thread, 'Threading required for this test.') class BufferIOTest(SocketConnectedTest): @@ -4517,6 +4576,8 @@ def test_main(): ]) if hasattr(socket, "socketpair"): tests.append(BasicSocketPairTest) + if hasattr(socket, "AF_UNIX"): + tests.append(TestUnixDomain) if sys.platform == 'linux': tests.append(TestLinuxAbstractNamespace) if isTipcAvailable(): diff --git a/Misc/NEWS b/Misc/NEWS index 3f3d1df87c9..5be6990b0e5 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -419,6 +419,10 @@ Core and Builtins Library ------- +- Issue #8373: The filesystem path of AF_UNIX sockets now uses the filesystem + encoding and the surrogateescape error handler, rather than UTF-8. Patch + by David Watson. + - Issue #10350: Read and save errno before calling a function which might overwrite it. Original patch by Hallvard B Furuseth. diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index 40a18ed3c1d..c828d49e39e 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -1073,7 +1073,7 @@ makesockaddr(SOCKET_T sockfd, struct sockaddr *addr, size_t addrlen, int proto) #endif /* linux */ { /* regular NULL-terminated string */ - return PyUnicode_FromString(a->sun_path); + return PyUnicode_DecodeFSDefault(a->sun_path); } } #endif /* AF_UNIX */ @@ -1269,8 +1269,18 @@ getsockaddrarg(PySocketSockObject *s, PyObject *args, struct sockaddr_un* addr; char *path; int len; - if (!PyArg_Parse(args, "s#", &path, &len)) - return 0; + int retval = 0; + + /* PEP 383. Not using PyUnicode_FSConverter since we need to + allow embedded nulls on Linux. */ + if (PyUnicode_Check(args)) { + if ((args = PyUnicode_EncodeFSDefault(args)) == NULL) + return 0; + } + else + Py_INCREF(args); + if (!PyArg_Parse(args, "y#", &path, &len)) + goto unix_out; addr = (struct sockaddr_un*)addr_ret; #ifdef linux @@ -1279,7 +1289,7 @@ getsockaddrarg(PySocketSockObject *s, PyObject *args, if (len > sizeof addr->sun_path) { PyErr_SetString(PyExc_OSError, "AF_UNIX path too long"); - return 0; + goto unix_out; } } else @@ -1289,7 +1299,7 @@ getsockaddrarg(PySocketSockObject *s, PyObject *args, if (len >= sizeof addr->sun_path) { PyErr_SetString(PyExc_OSError, "AF_UNIX path too long"); - return 0; + goto unix_out; } addr->sun_path[len] = 0; } @@ -1300,7 +1310,10 @@ getsockaddrarg(PySocketSockObject *s, PyObject *args, #else *len_ret = len + offsetof(struct sockaddr_un, sun_path); #endif - return 1; + retval = 1; + unix_out: + Py_DECREF(args); + return retval; } #endif /* AF_UNIX */