bpo-33721: Make some os.path functions and pathlib.Path methods be tolerant to invalid paths. (#7695)

Such functions as os.path.exists(), os.path.lexists(), os.path.isdir(),
os.path.isfile(), os.path.islink(), and os.path.ismount() now return False
instead of raising ValueError or its subclasses UnicodeEncodeError
and UnicodeDecodeError for paths that contain characters or bytes
unrepresentative at the OS level.
This commit is contained in:
Serhiy Storchaka 2018-09-18 11:28:51 +03:00 committed by GitHub
parent 7bdf28265a
commit 0185f34ddc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 181 additions and 54 deletions

View File

@ -55,6 +55,14 @@ the :mod:`glob` module.)
* :mod:`macpath` for old-style MacOS paths
.. versionchanged:: 3.8
:func:`exists`, :func:`lexists`, :func:`isdir`, :func:`isfile`,
:func:`islink`, and :func:`ismount` now return ``False`` instead of
raising an exception for paths that contain characters or bytes
unrepresentable at the OS level.
.. function:: abspath(path)
Return a normalized absolutized version of the pathname *path*. On most

View File

@ -638,7 +638,17 @@ Methods
Concrete paths provide the following methods in addition to pure paths
methods. Many of these methods can raise an :exc:`OSError` if a system
call fails (for example because the path doesn't exist):
call fails (for example because the path doesn't exist).
.. versionchanged:: 3.8
:meth:`~Path.exists()`, :meth:`~Path.is_dir()`, :meth:`~Path.is_file()`,
:meth:`~Path.is_mount()`, :meth:`~Path.is_symlink()`,
:meth:`~Path.is_block_device()`, :meth:`~Path.is_char_device()`,
:meth:`~Path.is_fifo()`, :meth:`~Path.is_socket()` now return ``False``
instead of raising an exception for paths that contain characters
unrepresentable at the OS level.
.. classmethod:: Path.cwd()

View File

@ -112,6 +112,31 @@ New Modules
Improved Modules
================
os.path
-------
:mod:`os.path` functions that return a boolean result like
:func:`~os.path.exists`, :func:`~os.path.lexists`, :func:`~os.path.isdir`,
:func:`~os.path.isfile`, :func:`~os.path.islink`, and :func:`~os.path.ismount`
now return ``False`` instead of raising :exc:`ValueError` or its subclasses
:exc:`UnicodeEncodeError` and :exc:`UnicodeDecodeError` for paths that contain
characters or bytes unrepresentable at the OS level.
(Contributed by Serhiy Storchaka in :issue:`33721`.)
pathlib
-------
:mod:`pathlib.Path` methods that return a boolean result like
:meth:`~pathlib.Path.exists()`, :meth:`~pathlib.Path.is_dir()`,
:meth:`~pathlib.Path.is_file()`, :meth:`~pathlib.Path.is_mount()`,
:meth:`~pathlib.Path.is_symlink()`, :meth:`~pathlib.Path.is_block_device()`,
:meth:`~pathlib.Path.is_char_device()`, :meth:`~pathlib.Path.is_fifo()`,
:meth:`~pathlib.Path.is_socket()` now return ``False`` instead of raising
:exc:`ValueError` or its subclass :exc:`UnicodeEncodeError` for paths that
contain characters unrepresentable at the OS level.
(Contributed by Serhiy Storchaka in :issue:`33721`.)
Optimizations
=============

View File

@ -17,7 +17,7 @@ def exists(path):
"""Test whether a path exists. Returns False for broken symbolic links"""
try:
os.stat(path)
except OSError:
except (OSError, ValueError):
return False
return True
@ -28,7 +28,7 @@ def isfile(path):
"""Test whether a path is a regular file"""
try:
st = os.stat(path)
except OSError:
except (OSError, ValueError):
return False
return stat.S_ISREG(st.st_mode)
@ -40,7 +40,7 @@ def isdir(s):
"""Return true if the pathname refers to an existing directory."""
try:
st = os.stat(s)
except OSError:
except (OSError, ValueError):
return False
return stat.S_ISDIR(st.st_mode)

View File

@ -138,7 +138,7 @@ def lexists(path):
try:
st = os.lstat(path)
except OSError:
except (OSError, ValueError):
return False
return True

View File

@ -229,7 +229,7 @@ def islink(path):
"""
try:
st = os.lstat(path)
except (OSError, AttributeError):
except (OSError, ValueError, AttributeError):
return False
return stat.S_ISLNK(st.st_mode)
@ -239,7 +239,7 @@ def lexists(path):
"""Test whether a path exists. Returns True for broken symbolic links"""
try:
st = os.lstat(path)
except OSError:
except (OSError, ValueError):
return False
return True
@ -524,7 +524,7 @@ else: # use native Windows method on Windows
"""Return the absolute version of a path."""
try:
return _getfullpathname(path)
except OSError:
except (OSError, ValueError):
return _abspath_fallback(path)
# realpath is a no-op on systems without islink support

View File

@ -1331,6 +1331,9 @@ class Path(PurePath):
if e.errno not in _IGNORED_ERROS:
raise
return False
except ValueError:
# Non-encodable path
return False
return True
def is_dir(self):
@ -1345,6 +1348,9 @@ class Path(PurePath):
# Path doesn't exist or is a broken symlink
# (see https://bitbucket.org/pitrou/pathlib/issue/12/)
return False
except ValueError:
# Non-encodable path
return False
def is_file(self):
"""
@ -1359,6 +1365,9 @@ class Path(PurePath):
# Path doesn't exist or is a broken symlink
# (see https://bitbucket.org/pitrou/pathlib/issue/12/)
return False
except ValueError:
# Non-encodable path
return False
def is_mount(self):
"""
@ -1392,6 +1401,9 @@ class Path(PurePath):
raise
# Path doesn't exist
return False
except ValueError:
# Non-encodable path
return False
def is_block_device(self):
"""
@ -1405,6 +1417,9 @@ class Path(PurePath):
# Path doesn't exist or is a broken symlink
# (see https://bitbucket.org/pitrou/pathlib/issue/12/)
return False
except ValueError:
# Non-encodable path
return False
def is_char_device(self):
"""
@ -1418,6 +1433,9 @@ class Path(PurePath):
# Path doesn't exist or is a broken symlink
# (see https://bitbucket.org/pitrou/pathlib/issue/12/)
return False
except ValueError:
# Non-encodable path
return False
def is_fifo(self):
"""
@ -1431,6 +1449,9 @@ class Path(PurePath):
# Path doesn't exist or is a broken symlink
# (see https://bitbucket.org/pitrou/pathlib/issue/12/)
return False
except ValueError:
# Non-encodable path
return False
def is_socket(self):
"""
@ -1444,6 +1465,9 @@ class Path(PurePath):
# Path doesn't exist or is a broken symlink
# (see https://bitbucket.org/pitrou/pathlib/issue/12/)
return False
except ValueError:
# Non-encodable path
return False
def expanduser(self):
""" Return a new path with expanded ~ and ~user constructs

View File

@ -169,7 +169,7 @@ def islink(path):
"""Test whether a path is a symbolic link"""
try:
st = os.lstat(path)
except (OSError, AttributeError):
except (OSError, ValueError, AttributeError):
return False
return stat.S_ISLNK(st.st_mode)
@ -179,7 +179,7 @@ def lexists(path):
"""Test whether a path exists. Returns True for broken symbolic links"""
try:
os.lstat(path)
except OSError:
except (OSError, ValueError):
return False
return True
@ -191,7 +191,7 @@ def ismount(path):
"""Test whether a path is a mount point"""
try:
s1 = os.lstat(path)
except OSError:
except (OSError, ValueError):
# It doesn't exist -- so not a mount point. :-)
return False
else:
@ -206,7 +206,7 @@ def ismount(path):
parent = realpath(parent)
try:
s2 = os.lstat(parent)
except OSError:
except (OSError, ValueError):
return False
dev1 = s1.st_dev

View File

@ -138,10 +138,20 @@ class GenericTest:
self.assertIs(self.pathmodule.exists(filename), True)
self.assertIs(self.pathmodule.exists(bfilename), True)
self.assertIs(self.pathmodule.exists(filename + '\udfff'), False)
self.assertIs(self.pathmodule.exists(bfilename + b'\xff'), False)
self.assertIs(self.pathmodule.exists(filename + '\x00'), False)
self.assertIs(self.pathmodule.exists(bfilename + b'\x00'), False)
if self.pathmodule is not genericpath:
self.assertIs(self.pathmodule.lexists(filename), True)
self.assertIs(self.pathmodule.lexists(bfilename), True)
self.assertIs(self.pathmodule.lexists(filename + '\udfff'), False)
self.assertIs(self.pathmodule.lexists(bfilename + b'\xff'), False)
self.assertIs(self.pathmodule.lexists(filename + '\x00'), False)
self.assertIs(self.pathmodule.lexists(bfilename + b'\x00'), False)
@unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()")
def test_exists_fd(self):
r, w = os.pipe()
@ -158,6 +168,11 @@ class GenericTest:
self.assertIs(self.pathmodule.isdir(filename), False)
self.assertIs(self.pathmodule.isdir(bfilename), False)
self.assertIs(self.pathmodule.isdir(filename + '\udfff'), False)
self.assertIs(self.pathmodule.isdir(bfilename + b'\xff'), False)
self.assertIs(self.pathmodule.isdir(filename + '\x00'), False)
self.assertIs(self.pathmodule.isdir(bfilename + b'\x00'), False)
try:
create_file(filename)
self.assertIs(self.pathmodule.isdir(filename), False)
@ -178,6 +193,11 @@ class GenericTest:
self.assertIs(self.pathmodule.isfile(filename), False)
self.assertIs(self.pathmodule.isfile(bfilename), False)
self.assertIs(self.pathmodule.isfile(filename + '\udfff'), False)
self.assertIs(self.pathmodule.isfile(bfilename + b'\xff'), False)
self.assertIs(self.pathmodule.isfile(filename + '\x00'), False)
self.assertIs(self.pathmodule.isfile(bfilename + b'\x00'), False)
try:
create_file(filename)
self.assertIs(self.pathmodule.isfile(filename), True)
@ -298,18 +318,20 @@ class TestGenericTest(GenericTest, unittest.TestCase):
continue
func = getattr(self.pathmodule, attr)
with self.subTest(attr=attr):
try:
if attr in ('exists', 'isdir', 'isfile'):
func('/tmp\udfffabcds')
except (OSError, UnicodeEncodeError):
pass
try:
func(b'/tmp\xffabcds')
except (OSError, UnicodeDecodeError):
pass
with self.assertRaisesRegex(ValueError, 'embedded null'):
func('/tmp\x00abcds')
with self.assertRaisesRegex(ValueError, 'embedded null'):
func(b'/tmp\x00abcds')
else:
with self.assertRaises((OSError, UnicodeEncodeError)):
func('/tmp\udfffabcds')
with self.assertRaises((OSError, UnicodeDecodeError)):
func(b'/tmp\xffabcds')
with self.assertRaisesRegex(ValueError, 'embedded null'):
func('/tmp\x00abcds')
with self.assertRaisesRegex(ValueError, 'embedded null'):
func(b'/tmp\x00abcds')
# Following TestCase is not supposed to be run from test_genericpath.
# It is inherited by other test modules (macpath, ntpath, posixpath).

View File

@ -1342,6 +1342,8 @@ class _BasePathTest(object):
self.assertIs(False, (p / 'linkA' / 'bah').exists())
self.assertIs(False, (p / 'foo').exists())
self.assertIs(False, P('/xyzzy').exists())
self.assertIs(False, P(BASE + '\udfff').exists())
self.assertIs(False, P(BASE + '\x00').exists())
def test_open_common(self):
p = self.cls(BASE)
@ -1866,7 +1868,9 @@ class _BasePathTest(object):
if support.can_symlink():
self.assertFalse((P / 'linkA').is_dir())
self.assertTrue((P / 'linkB').is_dir())
self.assertFalse((P/ 'brokenLink').is_dir())
self.assertFalse((P/ 'brokenLink').is_dir(), False)
self.assertIs((P / 'dirA\udfff').is_dir(), False)
self.assertIs((P / 'dirA\x00').is_dir(), False)
def test_is_file(self):
P = self.cls(BASE)
@ -1878,6 +1882,8 @@ class _BasePathTest(object):
self.assertTrue((P / 'linkA').is_file())
self.assertFalse((P / 'linkB').is_file())
self.assertFalse((P/ 'brokenLink').is_file())
self.assertIs((P / 'fileA\udfff').is_file(), False)
self.assertIs((P / 'fileA\x00').is_file(), False)
@only_posix
def test_is_mount(self):
@ -1890,6 +1896,8 @@ class _BasePathTest(object):
self.assertTrue(R.is_mount())
if support.can_symlink():
self.assertFalse((P / 'linkA').is_mount())
self.assertIs(self.cls('/\udfff').is_mount(), False)
self.assertIs(self.cls('/\x00').is_mount(), False)
def test_is_symlink(self):
P = self.cls(BASE)
@ -1901,6 +1909,11 @@ class _BasePathTest(object):
self.assertTrue((P / 'linkA').is_symlink())
self.assertTrue((P / 'linkB').is_symlink())
self.assertTrue((P/ 'brokenLink').is_symlink())
self.assertIs((P / 'fileA\udfff').is_file(), False)
self.assertIs((P / 'fileA\x00').is_file(), False)
if support.can_symlink():
self.assertIs((P / 'linkA\udfff').is_file(), False)
self.assertIs((P / 'linkA\x00').is_file(), False)
def test_is_fifo_false(self):
P = self.cls(BASE)
@ -1908,6 +1921,8 @@ class _BasePathTest(object):
self.assertFalse((P / 'dirA').is_fifo())
self.assertFalse((P / 'non-existing').is_fifo())
self.assertFalse((P / 'fileA' / 'bah').is_fifo())
self.assertIs((P / 'fileA\udfff').is_fifo(), False)
self.assertIs((P / 'fileA\x00').is_fifo(), False)
@unittest.skipUnless(hasattr(os, "mkfifo"), "os.mkfifo() required")
def test_is_fifo_true(self):
@ -1919,6 +1934,8 @@ class _BasePathTest(object):
self.assertTrue(P.is_fifo())
self.assertFalse(P.is_socket())
self.assertFalse(P.is_file())
self.assertIs(self.cls(BASE, 'myfifo\udfff').is_fifo(), False)
self.assertIs(self.cls(BASE, 'myfifo\x00').is_fifo(), False)
def test_is_socket_false(self):
P = self.cls(BASE)
@ -1926,6 +1943,8 @@ class _BasePathTest(object):
self.assertFalse((P / 'dirA').is_socket())
self.assertFalse((P / 'non-existing').is_socket())
self.assertFalse((P / 'fileA' / 'bah').is_socket())
self.assertIs((P / 'fileA\udfff').is_socket(), False)
self.assertIs((P / 'fileA\x00').is_socket(), False)
@unittest.skipUnless(hasattr(socket, "AF_UNIX"), "Unix sockets required")
def test_is_socket_true(self):
@ -1941,6 +1960,8 @@ class _BasePathTest(object):
self.assertTrue(P.is_socket())
self.assertFalse(P.is_fifo())
self.assertFalse(P.is_file())
self.assertIs(self.cls(BASE, 'mysock\udfff').is_socket(), False)
self.assertIs(self.cls(BASE, 'mysock\x00').is_socket(), False)
def test_is_block_device_false(self):
P = self.cls(BASE)
@ -1948,6 +1969,8 @@ class _BasePathTest(object):
self.assertFalse((P / 'dirA').is_block_device())
self.assertFalse((P / 'non-existing').is_block_device())
self.assertFalse((P / 'fileA' / 'bah').is_block_device())
self.assertIs((P / 'fileA\udfff').is_block_device(), False)
self.assertIs((P / 'fileA\x00').is_block_device(), False)
def test_is_char_device_false(self):
P = self.cls(BASE)
@ -1955,6 +1978,8 @@ class _BasePathTest(object):
self.assertFalse((P / 'dirA').is_char_device())
self.assertFalse((P / 'non-existing').is_char_device())
self.assertFalse((P / 'fileA' / 'bah').is_char_device())
self.assertIs((P / 'fileA\udfff').is_char_device(), False)
self.assertIs((P / 'fileA\x00').is_char_device(), False)
def test_is_char_device_true(self):
# Under Unix, /dev/null should generally be a char device
@ -1964,6 +1989,8 @@ class _BasePathTest(object):
self.assertTrue(P.is_char_device())
self.assertFalse(P.is_block_device())
self.assertFalse(P.is_file())
self.assertIs(self.cls('/dev/null\udfff').is_char_device(), False)
self.assertIs(self.cls('/dev/null\x00').is_char_device(), False)
def test_pickling_common(self):
p = self.cls(BASE, 'fileA')

View File

@ -153,9 +153,11 @@ class PosixPathTest(unittest.TestCase):
def test_islink(self):
self.assertIs(posixpath.islink(support.TESTFN + "1"), False)
self.assertIs(posixpath.lexists(support.TESTFN + "2"), False)
with open(support.TESTFN + "1", "wb") as f:
f.write(b"foo")
self.assertIs(posixpath.islink(support.TESTFN + "1"), False)
if support.can_symlink():
os.symlink(support.TESTFN + "1", support.TESTFN + "2")
self.assertIs(posixpath.islink(support.TESTFN + "2"), True)
@ -164,6 +166,11 @@ class PosixPathTest(unittest.TestCase):
self.assertIs(posixpath.exists(support.TESTFN + "2"), False)
self.assertIs(posixpath.lexists(support.TESTFN + "2"), True)
self.assertIs(posixpath.islink(support.TESTFN + "\udfff"), False)
self.assertIs(posixpath.islink(os.fsencode(support.TESTFN) + b"\xff"), False)
self.assertIs(posixpath.islink(support.TESTFN + "\x00"), False)
self.assertIs(posixpath.islink(os.fsencode(support.TESTFN) + b"\x00"), False)
def test_ismount(self):
self.assertIs(posixpath.ismount("/"), True)
self.assertIs(posixpath.ismount(b"/"), True)
@ -177,6 +184,11 @@ class PosixPathTest(unittest.TestCase):
finally:
safe_rmdir(ABSTFN)
self.assertIs(posixpath.ismount('/\udfff'), False)
self.assertIs(posixpath.ismount(b'/\xff'), False)
self.assertIs(posixpath.ismount('/\x00'), False)
self.assertIs(posixpath.ismount(b'/\x00'), False)
@unittest.skipUnless(support.can_symlink(),
"Test requires symlink support")
def test_ismount_symlinks(self):

View File

@ -159,13 +159,11 @@ class HelperFunctionsTests(unittest.TestCase):
# Issue 5258
pth_dir, pth_fn = self.make_pth("abc\x00def\n")
with captured_stderr() as err_out:
site.addpackage(pth_dir, pth_fn, set())
self.assertRegex(err_out.getvalue(), "line 1")
self.assertRegex(err_out.getvalue(),
re.escape(os.path.join(pth_dir, pth_fn)))
# XXX: ditto previous XXX comment.
self.assertRegex(err_out.getvalue(), 'Traceback')
self.assertRegex(err_out.getvalue(), 'ValueError')
self.assertFalse(site.addpackage(pth_dir, pth_fn, set()))
self.assertEqual(err_out.getvalue(), "")
for path in sys.path:
if isinstance(path, str):
self.assertNotIn("abc\x00def", path)
def test_addsitedir(self):
# Same tests for test_addpackage since addsitedir() essentially just

View File

@ -0,0 +1,12 @@
:mod:`os.path` functions that return a boolean result like
:func:`~os.path.exists`, :func:`~os.path.lexists`, :func:`~os.path.isdir`,
:func:`~os.path.isfile`, :func:`~os.path.islink`, and :func:`~os.path.ismount`,
and :mod:`pathlib.Path` methods that return a boolean result like
:meth:`~pathlib.Path.exists()`, :meth:`~pathlib.Path.is_dir()`,
:meth:`~pathlib.Path.is_file()`, :meth:`~pathlib.Path.is_mount()`,
:meth:`~pathlib.Path.is_symlink()`, :meth:`~pathlib.Path.is_block_device()`,
:meth:`~pathlib.Path.is_char_device()`, :meth:`~pathlib.Path.is_fifo()`,
:meth:`~pathlib.Path.is_socket()` now return ``False`` instead of raising
:exc:`ValueError` or its subclasses :exc:`UnicodeEncodeError` and
:exc:`UnicodeDecodeError` for paths that contain characters or bytes
unrepresentable at the OS level.

View File

@ -1005,27 +1005,6 @@ PyDoc_STRVAR(os__isdir__doc__,
#define OS__ISDIR_METHODDEF \
{"_isdir", (PyCFunction)os__isdir, METH_O, os__isdir__doc__},
static PyObject *
os__isdir_impl(PyObject *module, path_t *path);
static PyObject *
os__isdir(PyObject *module, PyObject *arg)
{
PyObject *return_value = NULL;
path_t path = PATH_T_INITIALIZE("_isdir", "path", 0, 0);
if (!PyArg_Parse(arg, "O&:_isdir", path_converter, &path)) {
goto exit;
}
return_value = os__isdir_impl(module, &path);
exit:
/* Cleanup for path */
path_cleanup(&path);
return return_value;
}
#endif /* defined(MS_WINDOWS) */
#if defined(MS_WINDOWS)
@ -6778,4 +6757,4 @@ exit:
#ifndef OS_GETRANDOM_METHODDEF
#define OS_GETRANDOM_METHODDEF
#endif /* !defined(OS_GETRANDOM_METHODDEF) */
/*[clinic end generated code: output=0f23518dd4482e66 input=a9049054013a1b77]*/
/*[clinic end generated code: output=40cac0135f846202 input=a9049054013a1b77]*/

View File

@ -3821,22 +3821,32 @@ cleanup:
/*[clinic input]
os._isdir
path: path_t
path as arg: object
/
Return true if the pathname refers to an existing directory.
[clinic start generated code]*/
static PyObject *
os__isdir_impl(PyObject *module, path_t *path)
/*[clinic end generated code: output=75f56f32720836cb input=5e0800149c0ad95f]*/
os__isdir(PyObject *module, PyObject *arg)
/*[clinic end generated code: output=404f334d85d4bf25 input=36cb6785874d479e]*/
{
DWORD attributes;
path_t path = PATH_T_INITIALIZE("_isdir", "path", 0, 0);
if (!path_converter(arg, &path)) {
if (PyErr_ExceptionMatches(PyExc_ValueError)) {
PyErr_Clear();
Py_RETURN_FALSE;
}
return NULL;
}
Py_BEGIN_ALLOW_THREADS
attributes = GetFileAttributesW(path->wide);
attributes = GetFileAttributesW(path.wide);
Py_END_ALLOW_THREADS
path_cleanup(&path);
if (attributes == INVALID_FILE_ATTRIBUTES)
Py_RETURN_FALSE;