From d33aa18f15de482a01988aabc75907328e1f9c9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EB=B0=95=EB=AC=B8=EC=8B=9D?= Date: Thu, 5 Oct 2023 23:49:07 +0900 Subject: [PATCH] gh-82367: Use `FindFirstFile` Win32 API in `ntpath.realpath()` (GH-110298) * Use `FindFirstFile` Win32 API to fix a bug where `ntpath.realpath()` breaks out of traversing a series of paths where a (handled) `ERROR_ACCESS_DENIED` or `ERROR_SHARING_VIOLATION` occurs. * Update docs to reflect that `ntpath.realpath()` eliminates MS-DOS style names. --- Doc/library/os.path.rst | 3 +- Doc/whatsnew/3.13.rst | 3 ++ Lib/ntpath.py | 16 ++++--- Lib/test/test_ntpath.py | 43 +++++++++++++++++++ Misc/ACKS | 1 + ...3-10-03-12-30-59.gh-issue-82367.nxwfMx.rst | 2 + Modules/clinic/posixmodule.c.h | 40 ++++++++++++++++- Modules/posixmodule.c | 32 ++++++++++++++ 8 files changed, 132 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2023-10-03-12-30-59.gh-issue-82367.nxwfMx.rst diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 6f9e0853bc8..95933f56d50 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -377,7 +377,8 @@ the :mod:`glob` module.) Return the canonical path of the specified filename, eliminating any symbolic links encountered in the path (if they are supported by the operating - system). + system). On Windows, this function will also resolve MS-DOS (also called 8.3) + style names such as ``C:\\PROGRA~1`` to ``C:\\Program Files``. If a path doesn't exist or a symlink loop is encountered, and *strict* is ``True``, :exc:`OSError` is raised. If *strict* is ``False``, the path is diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 785deea1c1e..7a62963203e 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -97,6 +97,9 @@ Other Language Changes if supported. (Contributed by Victor Stinner in :gh:`109649`.) +* :func:`os.path.realpath` now resolves MS-DOS style file names even if + the file is not accessible. + (Contributed by Moonsik Park in :gh:`82367`.) New Modules =========== diff --git a/Lib/ntpath.py b/Lib/ntpath.py index df3402d46c9..3061a4a5ef4 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -23,7 +23,6 @@ import stat import genericpath from genericpath import * - __all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", "getatime","getctime", "islink","exists","lexists","isdir","isfile", @@ -601,7 +600,7 @@ else: # use native Windows method on Windows return _abspath_fallback(path) try: - from nt import _getfinalpathname, readlink as _nt_readlink + from nt import _findfirstfile, _getfinalpathname, readlink as _nt_readlink except ImportError: # realpath is a no-op on systems without _getfinalpathname support. realpath = abspath @@ -688,10 +687,15 @@ else: except OSError: # If we fail to readlink(), let's keep traversing pass - path, name = split(path) - # TODO (bpo-38186): Request the real file name from the directory - # entry using FindFirstFileW. For now, we will return the path - # as best we have it + # If we get these errors, try to get the real name of the file without accessing it. + if ex.winerror in (1, 5, 32, 50, 87, 1920, 1921): + try: + name = _findfirstfile(path) + path, _ = split(path) + except OSError: + path, name = split(path) + else: + path, name = split(path) if path and not name: return path + tail tail = join(name, tail) if tail else name diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index d91dcdfb0c5..3e710d1c6da 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -2,6 +2,7 @@ import inspect import ntpath import os import string +import subprocess import sys import unittest import warnings @@ -637,6 +638,48 @@ class TestNtpath(NtpathTestCase): with os_helper.change_cwd(test_dir_short): self.assertPathEqual(test_file_long, ntpath.realpath("file.txt")) + @unittest.skipUnless(HAVE_GETFINALPATHNAME, 'need _getfinalpathname') + def test_realpath_permission(self): + # Test whether python can resolve the real filename of a + # shortened file name even if it does not have permission to access it. + ABSTFN = ntpath.realpath(os_helper.TESTFN) + + os_helper.unlink(ABSTFN) + os_helper.rmtree(ABSTFN) + os.mkdir(ABSTFN) + self.addCleanup(os_helper.rmtree, ABSTFN) + + test_file = ntpath.join(ABSTFN, "LongFileName123.txt") + test_file_short = ntpath.join(ABSTFN, "LONGFI~1.TXT") + + with open(test_file, "wb") as f: + f.write(b"content") + # Automatic generation of short names may be disabled on + # NTFS volumes for the sake of performance. + # They're not supported at all on ReFS and exFAT. + subprocess.run( + # Try to set the short name manually. + ['fsutil.exe', 'file', 'setShortName', test_file, 'LONGFI~1.TXT'], + creationflags=subprocess.DETACHED_PROCESS + ) + + try: + self.assertPathEqual(test_file, ntpath.realpath(test_file_short)) + except AssertionError: + raise unittest.SkipTest('the filesystem seems to lack support for short filenames') + + # Deny the right to [S]YNCHRONIZE on the file to + # force nt._getfinalpathname to fail with ERROR_ACCESS_DENIED. + p = subprocess.run( + ['icacls.exe', test_file, '/deny', '*S-1-5-32-545:(S)'], + creationflags=subprocess.DETACHED_PROCESS + ) + + if p.returncode: + raise unittest.SkipTest('failed to deny access to the test file') + + self.assertPathEqual(test_file, ntpath.realpath(test_file_short)) + def test_expandvars(self): with os_helper.EnvironmentVarGuard() as env: env.clear() diff --git a/Misc/ACKS b/Misc/ACKS index ccdfae66832..94cb1965676 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1373,6 +1373,7 @@ Peter Parente Alexandre Parenteau Dan Parisien HyeSoo Park +Moonsik Park William Park Claude Paroz Heikki Partanen diff --git a/Misc/NEWS.d/next/Windows/2023-10-03-12-30-59.gh-issue-82367.nxwfMx.rst b/Misc/NEWS.d/next/Windows/2023-10-03-12-30-59.gh-issue-82367.nxwfMx.rst new file mode 100644 index 00000000000..613ca075044 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2023-10-03-12-30-59.gh-issue-82367.nxwfMx.rst @@ -0,0 +1,2 @@ +:func:`os.path.realpath` now resolves MS-DOS style file names even if +the file is not accessible. Patch by Moonsik Park. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index fc39ab72bf2..0238d3a2f23 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -1848,6 +1848,40 @@ exit: #if defined(MS_WINDOWS) +PyDoc_STRVAR(os__findfirstfile__doc__, +"_findfirstfile($module, path, /)\n" +"--\n" +"\n" +"A function to get the real file name without accessing the file in Windows."); + +#define OS__FINDFIRSTFILE_METHODDEF \ + {"_findfirstfile", (PyCFunction)os__findfirstfile, METH_O, os__findfirstfile__doc__}, + +static PyObject * +os__findfirstfile_impl(PyObject *module, path_t *path); + +static PyObject * +os__findfirstfile(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + path_t path = PATH_T_INITIALIZE("_findfirstfile", "path", 0, 0); + + if (!path_converter(arg, &path)) { + goto exit; + } + return_value = os__findfirstfile_impl(module, &path); + +exit: + /* Cleanup for path */ + path_cleanup(&path); + + return return_value; +} + +#endif /* defined(MS_WINDOWS) */ + +#if defined(MS_WINDOWS) + PyDoc_STRVAR(os__getvolumepathname__doc__, "_getvolumepathname($module, /, path)\n" "--\n" @@ -11451,6 +11485,10 @@ exit: #define OS__GETFINALPATHNAME_METHODDEF #endif /* !defined(OS__GETFINALPATHNAME_METHODDEF) */ +#ifndef OS__FINDFIRSTFILE_METHODDEF + #define OS__FINDFIRSTFILE_METHODDEF +#endif /* !defined(OS__FINDFIRSTFILE_METHODDEF) */ + #ifndef OS__GETVOLUMEPATHNAME_METHODDEF #define OS__GETVOLUMEPATHNAME_METHODDEF #endif /* !defined(OS__GETVOLUMEPATHNAME_METHODDEF) */ @@ -11986,4 +12024,4 @@ exit: #ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF #define OS_WAITSTATUS_TO_EXITCODE_METHODDEF #endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */ -/*[clinic end generated code: output=8b60de6ddb925bc3 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a36904281a8a7507 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index d3c0aa6f3c5..2c32a45a532 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -4809,6 +4809,37 @@ cleanup: return result; } +/*[clinic input] +os._findfirstfile + path: path_t + / +A function to get the real file name without accessing the file in Windows. +[clinic start generated code]*/ + +static PyObject * +os__findfirstfile_impl(PyObject *module, path_t *path) +/*[clinic end generated code: output=106dd3f0779c83dd input=0734dff70f60e1a8]*/ +{ + PyObject *result; + HANDLE hFindFile; + WIN32_FIND_DATAW wFileData; + WCHAR *wRealFileName; + + Py_BEGIN_ALLOW_THREADS + hFindFile = FindFirstFileW(path->wide, &wFileData); + Py_END_ALLOW_THREADS + + if (hFindFile == INVALID_HANDLE_VALUE) { + path_error(path); + return NULL; + } + + wRealFileName = wFileData.cFileName; + result = PyUnicode_FromWideChar(wRealFileName, wcslen(wRealFileName)); + FindClose(hFindFile); + return result; +} + /*[clinic input] os._getvolumepathname @@ -15961,6 +15992,7 @@ static PyMethodDef posix_methods[] = { OS__GETFULLPATHNAME_METHODDEF OS__GETDISKUSAGE_METHODDEF OS__GETFINALPATHNAME_METHODDEF + OS__FINDFIRSTFILE_METHODDEF OS__GETVOLUMEPATHNAME_METHODDEF OS__PATH_SPLITROOT_METHODDEF OS__PATH_NORMPATH_METHODDEF