gh-101196: Make isdir/isfile/exists faster on Windows (GH-101324)

Co-authored-by: Eryk Sun <eryksun@gmail.com>
This commit is contained in:
Michael Droettboom 2023-02-08 09:34:24 -05:00 committed by GitHub
parent 3a88de7a0a
commit 86ebd5c3fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 624 additions and 34 deletions

View File

@ -247,6 +247,10 @@ typedef Py_ssize_t Py_ssize_clean_t;
#define S_ISCHR(x) (((x) & S_IFMT) == S_IFCHR)
#endif
#ifndef S_ISLNK
#define S_ISLNK(x) (((x) & S_IFMT) == S_IFLNK)
#endif
#ifdef __cplusplus
/* Move this down here since some C++ #include's don't like to be included
inside an extern "C" */

View File

@ -7,7 +7,7 @@ import os
import stat
__all__ = ['commonprefix', 'exists', 'getatime', 'getctime', 'getmtime',
'getsize', 'isdir', 'isfile', 'samefile', 'sameopenfile',
'getsize', 'isdir', 'isfile', 'islink', 'samefile', 'sameopenfile',
'samestat']
@ -45,6 +45,18 @@ def isdir(s):
return stat.S_ISDIR(st.st_mode)
# Is a path a symbolic link?
# This will always return false on systems where os.lstat doesn't exist.
def islink(path):
"""Test whether a path is a symbolic link"""
try:
st = os.lstat(path)
except (OSError, ValueError, AttributeError):
return False
return stat.S_ISLNK(st.st_mode)
def getsize(filename):
"""Return the size of a file, reported by os.stat()."""
return os.stat(filename).st_size

View File

@ -276,19 +276,6 @@ def dirname(p):
"""Returns the directory component of a pathname"""
return split(p)[0]
# Is a path a symbolic link?
# This will always return false on systems where os.lstat doesn't exist.
def islink(path):
"""Test whether a path is a symbolic link.
This will always return false for Windows prior to 6.0.
"""
try:
st = os.lstat(path)
except (OSError, ValueError, AttributeError):
return False
return stat.S_ISLNK(st.st_mode)
# Is a path a junction?
@ -870,11 +857,13 @@ def commonpath(paths):
try:
# The genericpath.isdir implementation uses os.stat and checks the mode
# attribute to tell whether or not the path is a directory.
# This is overkill on Windows - just pass the path to GetFileAttributes
# and check the attribute from there.
from nt import _isdir as isdir
# The isdir(), isfile(), islink() and exists() implementations in
# genericpath use os.stat(). This is overkill on Windows. Use simpler
# builtin functions if they are available.
from nt import _path_isdir as isdir
from nt import _path_isfile as isfile
from nt import _path_islink as islink
from nt import _path_exists as exists
except ImportError:
# Use genericpath.isdir as imported above.
# Use genericpath.* as imported above
pass

View File

@ -187,18 +187,6 @@ def dirname(p):
return head
# Is a path a symbolic link?
# This will always return false on systems where os.lstat doesn't exist.
def islink(path):
"""Test whether a path is a symbolic link"""
try:
st = os.lstat(path)
except (OSError, ValueError, AttributeError):
return False
return stat.S_ISLNK(st.st_mode)
# Is a path a junction?
def isjunction(path):

View File

@ -1,9 +1,10 @@
import inspect
import ntpath
import os
import sys
import unittest
import warnings
from test.support import os_helper
from test.support import cpython_only, os_helper
from test.support import TestFailed, is_emscripten
from test.support.os_helper import FakePath
from test import test_genericpath
@ -938,6 +939,35 @@ class TestNtpath(NtpathTestCase):
self.assertFalse(ntpath.isjunction('tmpdir'))
self.assertPathEqual(ntpath.realpath('testjunc'), ntpath.realpath('tmpdir'))
@unittest.skipIf(sys.platform != 'win32', "drive letters are a windows concept")
def test_isfile_driveletter(self):
drive = os.environ.get('SystemDrive')
if drive is None or len(drive) != 2 or drive[1] != ':':
raise unittest.SkipTest('SystemDrive is not defined or malformed')
self.assertFalse(os.path.isfile('\\\\.\\' + drive))
@unittest.skipIf(sys.platform != 'win32', "windows only")
def test_con_device(self):
self.assertFalse(os.path.isfile(r"\\.\CON"))
self.assertFalse(os.path.isdir(r"\\.\CON"))
self.assertFalse(os.path.islink(r"\\.\CON"))
self.assertTrue(os.path.exists(r"\\.\CON"))
@unittest.skipIf(sys.platform != 'win32', "Fast paths are only for win32")
@cpython_only
def test_fast_paths_in_use(self):
# There are fast paths of these functions implemented in posixmodule.c.
# Confirm that they are being used, and not the Python fallbacks in
# genericpath.py.
self.assertTrue(os.path.isdir is nt._path_isdir)
self.assertFalse(inspect.isfunction(os.path.isdir))
self.assertTrue(os.path.isfile is nt._path_isfile)
self.assertFalse(inspect.isfunction(os.path.isfile))
self.assertTrue(os.path.islink is nt._path_islink)
self.assertFalse(inspect.isfunction(os.path.islink))
self.assertTrue(os.path.exists is nt._path_exists)
self.assertFalse(inspect.isfunction(os.path.exists))
class NtCommonTest(test_genericpath.CommonTest, unittest.TestCase):
pathmodule = ntpath

View File

@ -742,6 +742,7 @@ class StatAttributeTests(unittest.TestCase):
)
result = os.stat(fname)
self.assertNotEqual(result.st_size, 0)
self.assertTrue(os.path.isfile(fname))
@unittest.skipUnless(sys.platform == "win32", "Win32 specific tests")
def test_stat_block_device(self):
@ -2860,6 +2861,7 @@ class Win32SymlinkTests(unittest.TestCase):
self.assertEqual(st, os.stat(alias))
self.assertFalse(stat.S_ISLNK(st.st_mode))
self.assertEqual(st.st_reparse_tag, stat.IO_REPARSE_TAG_APPEXECLINK)
self.assertTrue(os.path.isfile(alias))
# testing the first one we see is sufficient
break
else:

View File

@ -0,0 +1,3 @@
The functions ``os.path.isdir``, ``os.path.isfile``, ``os.path.islink`` and
``os.path.exists`` are now 13% to 28% faster on Windows, by making fewer Win32
API calls.

View File

@ -1794,6 +1794,242 @@ exit:
#endif /* defined(MS_WINDOWS) */
#if defined(MS_WINDOWS)
PyDoc_STRVAR(os__path_isdir__doc__,
"_path_isdir($module, /, path)\n"
"--\n"
"\n"
"Return true if the pathname refers to an existing directory.");
#define OS__PATH_ISDIR_METHODDEF \
{"_path_isdir", _PyCFunction_CAST(os__path_isdir), METH_FASTCALL|METH_KEYWORDS, os__path_isdir__doc__},
static PyObject *
os__path_isdir_impl(PyObject *module, PyObject *path);
static PyObject *
os__path_isdir(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
#define NUM_KEYWORDS 1
static struct {
PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD
PyObject *ob_item[NUM_KEYWORDS];
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_item = { &_Py_ID(path), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
#else // !Py_BUILD_CORE
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
static const char * const _keywords[] = {"path", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "_path_isdir",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
PyObject *argsbuf[1];
PyObject *path;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
if (!args) {
goto exit;
}
path = args[0];
return_value = os__path_isdir_impl(module, path);
exit:
return return_value;
}
#endif /* defined(MS_WINDOWS) */
#if defined(MS_WINDOWS)
PyDoc_STRVAR(os__path_isfile__doc__,
"_path_isfile($module, /, path)\n"
"--\n"
"\n"
"Test whether a path is a regular file");
#define OS__PATH_ISFILE_METHODDEF \
{"_path_isfile", _PyCFunction_CAST(os__path_isfile), METH_FASTCALL|METH_KEYWORDS, os__path_isfile__doc__},
static PyObject *
os__path_isfile_impl(PyObject *module, PyObject *path);
static PyObject *
os__path_isfile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
#define NUM_KEYWORDS 1
static struct {
PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD
PyObject *ob_item[NUM_KEYWORDS];
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_item = { &_Py_ID(path), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
#else // !Py_BUILD_CORE
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
static const char * const _keywords[] = {"path", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "_path_isfile",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
PyObject *argsbuf[1];
PyObject *path;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
if (!args) {
goto exit;
}
path = args[0];
return_value = os__path_isfile_impl(module, path);
exit:
return return_value;
}
#endif /* defined(MS_WINDOWS) */
#if defined(MS_WINDOWS)
PyDoc_STRVAR(os__path_exists__doc__,
"_path_exists($module, /, path)\n"
"--\n"
"\n"
"Test whether a path exists. Returns False for broken symbolic links");
#define OS__PATH_EXISTS_METHODDEF \
{"_path_exists", _PyCFunction_CAST(os__path_exists), METH_FASTCALL|METH_KEYWORDS, os__path_exists__doc__},
static PyObject *
os__path_exists_impl(PyObject *module, PyObject *path);
static PyObject *
os__path_exists(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
#define NUM_KEYWORDS 1
static struct {
PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD
PyObject *ob_item[NUM_KEYWORDS];
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_item = { &_Py_ID(path), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
#else // !Py_BUILD_CORE
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
static const char * const _keywords[] = {"path", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "_path_exists",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
PyObject *argsbuf[1];
PyObject *path;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
if (!args) {
goto exit;
}
path = args[0];
return_value = os__path_exists_impl(module, path);
exit:
return return_value;
}
#endif /* defined(MS_WINDOWS) */
#if defined(MS_WINDOWS)
PyDoc_STRVAR(os__path_islink__doc__,
"_path_islink($module, /, path)\n"
"--\n"
"\n"
"Test whether a path is a symbolic link");
#define OS__PATH_ISLINK_METHODDEF \
{"_path_islink", _PyCFunction_CAST(os__path_islink), METH_FASTCALL|METH_KEYWORDS, os__path_islink__doc__},
static PyObject *
os__path_islink_impl(PyObject *module, PyObject *path);
static PyObject *
os__path_islink(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
#define NUM_KEYWORDS 1
static struct {
PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD
PyObject *ob_item[NUM_KEYWORDS];
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_item = { &_Py_ID(path), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
#else // !Py_BUILD_CORE
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
static const char * const _keywords[] = {"path", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "_path_islink",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
PyObject *argsbuf[1];
PyObject *path;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
if (!args) {
goto exit;
}
path = args[0];
return_value = os__path_islink_impl(module, path);
exit:
return return_value;
}
#endif /* defined(MS_WINDOWS) */
PyDoc_STRVAR(os__path_normpath__doc__,
"_path_normpath($module, /, path)\n"
"--\n"
@ -11041,6 +11277,22 @@ exit:
#define OS__PATH_SPLITROOT_METHODDEF
#endif /* !defined(OS__PATH_SPLITROOT_METHODDEF) */
#ifndef OS__PATH_ISDIR_METHODDEF
#define OS__PATH_ISDIR_METHODDEF
#endif /* !defined(OS__PATH_ISDIR_METHODDEF) */
#ifndef OS__PATH_ISFILE_METHODDEF
#define OS__PATH_ISFILE_METHODDEF
#endif /* !defined(OS__PATH_ISFILE_METHODDEF) */
#ifndef OS__PATH_EXISTS_METHODDEF
#define OS__PATH_EXISTS_METHODDEF
#endif /* !defined(OS__PATH_EXISTS_METHODDEF) */
#ifndef OS__PATH_ISLINK_METHODDEF
#define OS__PATH_ISLINK_METHODDEF
#endif /* !defined(OS__PATH_ISLINK_METHODDEF) */
#ifndef OS_NICE_METHODDEF
#define OS_NICE_METHODDEF
#endif /* !defined(OS_NICE_METHODDEF) */
@ -11560,4 +11812,4 @@ exit:
#ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF
#define OS_WAITSTATUS_TO_EXITCODE_METHODDEF
#endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */
/*[clinic end generated code: output=41eab6c3523792a9 input=a9049054013a1b77]*/
/*[clinic end generated code: output=a3f76228b549e8ec input=a9049054013a1b77]*/

View File

@ -4490,6 +4490,311 @@ os__path_splitroot_impl(PyObject *module, path_t *path)
}
/*[clinic input]
os._path_isdir
path: 'O'
Return true if the pathname refers to an existing directory.
[clinic start generated code]*/
static PyObject *
os__path_isdir_impl(PyObject *module, PyObject *path)
/*[clinic end generated code: output=00faea0af309669d input=b1d2571cf7291aaf]*/
{
HANDLE hfile;
BOOL close_file = TRUE;
FILE_BASIC_INFO info;
path_t _path = PATH_T_INITIALIZE("isdir", "path", 0, 1);
int result;
if (!path_converter(path, &_path)) {
path_cleanup(&_path);
if (PyErr_ExceptionMatches(PyExc_ValueError)) {
PyErr_Clear();
Py_RETURN_FALSE;
}
return NULL;
}
Py_BEGIN_ALLOW_THREADS
if (_path.fd != -1) {
hfile = _Py_get_osfhandle_noraise(_path.fd);
close_file = FALSE;
}
else {
hfile = CreateFileW(_path.wide, FILE_READ_ATTRIBUTES, 0, NULL,
OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL);
}
if (hfile != INVALID_HANDLE_VALUE) {
if (GetFileInformationByHandleEx(hfile, FileBasicInfo, &info,
sizeof(info)))
{
result = info.FileAttributes & FILE_ATTRIBUTE_DIRECTORY;
}
else {
result = 0;
}
if (close_file) {
CloseHandle(hfile);
}
}
else {
STRUCT_STAT st;
switch (GetLastError()) {
case ERROR_ACCESS_DENIED:
case ERROR_SHARING_VIOLATION:
case ERROR_CANT_ACCESS_FILE:
case ERROR_INVALID_PARAMETER:
if (STAT(_path.wide, &st)) {
result = 0;
}
else {
result = S_ISDIR(st.st_mode);
}
break;
default:
result = 0;
}
}
Py_END_ALLOW_THREADS
path_cleanup(&_path);
if (result) {
Py_RETURN_TRUE;
}
Py_RETURN_FALSE;
}
/*[clinic input]
os._path_isfile
path: 'O'
Test whether a path is a regular file
[clinic start generated code]*/
static PyObject *
os__path_isfile_impl(PyObject *module, PyObject *path)
/*[clinic end generated code: output=2394ed7c4b5cfd85 input=de22d74960ade365]*/
{
HANDLE hfile;
BOOL close_file = TRUE;
FILE_BASIC_INFO info;
path_t _path = PATH_T_INITIALIZE("isfile", "path", 0, 1);
int result;
if (!path_converter(path, &_path)) {
path_cleanup(&_path);
if (PyErr_ExceptionMatches(PyExc_ValueError)) {
PyErr_Clear();
Py_RETURN_FALSE;
}
return NULL;
}
Py_BEGIN_ALLOW_THREADS
if (_path.fd != -1) {
hfile = _Py_get_osfhandle_noraise(_path.fd);
close_file = FALSE;
}
else {
hfile = CreateFileW(_path.wide, FILE_READ_ATTRIBUTES, 0, NULL,
OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL);
}
if (hfile != INVALID_HANDLE_VALUE) {
if (GetFileInformationByHandleEx(hfile, FileBasicInfo, &info,
sizeof(info)))
{
result = !(info.FileAttributes & FILE_ATTRIBUTE_DIRECTORY);
}
else {
result = 0;
}
if (close_file) {
CloseHandle(hfile);
}
}
else {
STRUCT_STAT st;
switch (GetLastError()) {
case ERROR_ACCESS_DENIED:
case ERROR_SHARING_VIOLATION:
case ERROR_CANT_ACCESS_FILE:
case ERROR_INVALID_PARAMETER:
if (STAT(_path.wide, &st)) {
result = 0;
}
else {
result = S_ISREG(st.st_mode);
}
break;
default:
result = 0;
}
}
Py_END_ALLOW_THREADS
path_cleanup(&_path);
if (result) {
Py_RETURN_TRUE;
}
Py_RETURN_FALSE;
}
/*[clinic input]
os._path_exists
path: 'O'
Test whether a path exists. Returns False for broken symbolic links
[clinic start generated code]*/
static PyObject *
os__path_exists_impl(PyObject *module, PyObject *path)
/*[clinic end generated code: output=f508c3b35e13a249 input=380f77cdfa0f7ae8]*/
{
HANDLE hfile;
BOOL close_file = TRUE;
path_t _path = PATH_T_INITIALIZE("exists", "path", 0, 1);
int result;
if (!path_converter(path, &_path)) {
path_cleanup(&_path);
if (PyErr_ExceptionMatches(PyExc_ValueError)) {
PyErr_Clear();
Py_RETURN_FALSE;
}
return NULL;
}
Py_BEGIN_ALLOW_THREADS
if (_path.fd != -1) {
hfile = _Py_get_osfhandle_noraise(_path.fd);
close_file = FALSE;
}
else {
hfile = CreateFileW(_path.wide, FILE_READ_ATTRIBUTES, 0, NULL,
OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL);
}
if (hfile != INVALID_HANDLE_VALUE) {
result = 1;
if (close_file) {
CloseHandle(hfile);
}
}
else {
STRUCT_STAT st;
switch (GetLastError()) {
case ERROR_ACCESS_DENIED:
case ERROR_SHARING_VIOLATION:
case ERROR_CANT_ACCESS_FILE:
case ERROR_INVALID_PARAMETER:
if (STAT(_path.wide, &st)) {
result = 0;
}
else {
result = 1;
}
break;
default:
result = 0;
}
}
Py_END_ALLOW_THREADS
path_cleanup(&_path);
if (result) {
Py_RETURN_TRUE;
}
Py_RETURN_FALSE;
}
/*[clinic input]
os._path_islink
path: 'O'
Test whether a path is a symbolic link
[clinic start generated code]*/
static PyObject *
os__path_islink_impl(PyObject *module, PyObject *path)
/*[clinic end generated code: output=6d8640b1a390c054 input=38a3cb937ccf59bf]*/
{
HANDLE hfile;
BOOL close_file = TRUE;
FILE_ATTRIBUTE_TAG_INFO info;
path_t _path = PATH_T_INITIALIZE("islink", "path", 0, 1);
int result;
if (!path_converter(path, &_path)) {
path_cleanup(&_path);
if (PyErr_ExceptionMatches(PyExc_ValueError)) {
PyErr_Clear();
Py_RETURN_FALSE;
}
return NULL;
}
Py_BEGIN_ALLOW_THREADS
if (_path.fd != -1) {
hfile = _Py_get_osfhandle_noraise(_path.fd);
close_file = FALSE;
}
else {
hfile = CreateFileW(_path.wide, FILE_READ_ATTRIBUTES, 0, NULL,
OPEN_EXISTING,
FILE_FLAG_OPEN_REPARSE_POINT | FILE_FLAG_BACKUP_SEMANTICS,
NULL);
}
if (hfile != INVALID_HANDLE_VALUE) {
if (GetFileInformationByHandleEx(hfile, FileAttributeTagInfo, &info,
sizeof(info)))
{
result = (info.ReparseTag == IO_REPARSE_TAG_SYMLINK);
}
else {
result = 0;
}
if (close_file) {
CloseHandle(hfile);
}
}
else {
STRUCT_STAT st;
switch (GetLastError()) {
case ERROR_ACCESS_DENIED:
case ERROR_SHARING_VIOLATION:
case ERROR_CANT_ACCESS_FILE:
case ERROR_INVALID_PARAMETER:
if (LSTAT(_path.wide, &st)) {
result = 0;
}
else {
result = S_ISLNK(st.st_mode);
}
break;
default:
result = 0;
}
}
Py_END_ALLOW_THREADS
path_cleanup(&_path);
if (result) {
Py_RETURN_TRUE;
}
Py_RETURN_FALSE;
}
#endif /* MS_WINDOWS */
@ -15150,6 +15455,11 @@ static PyMethodDef posix_methods[] = {
OS_WAITSTATUS_TO_EXITCODE_METHODDEF
OS_SETNS_METHODDEF
OS_UNSHARE_METHODDEF
OS__PATH_ISDIR_METHODDEF
OS__PATH_ISFILE_METHODDEF
OS__PATH_ISLINK_METHODDEF
OS__PATH_EXISTS_METHODDEF
{NULL, NULL} /* Sentinel */
};