mirror of https://github.com/python/cpython
GH-114847: Speed up `posixpath.realpath()` (#114848)
Apply the following optimizations to `posixpath.realpath()`: - Remove use of recursion - Construct child paths directly rather than using `join()` - Use `os.getcwd[b]()` rather than `abspath()` - Use `startswith(sep)` rather than `isabs()` - Use slicing rather than `split()` Co-authored-by: Petr Viktorin <encukou@gmail.com>
This commit is contained in:
parent
9ceaee74db
commit
abfa16b44b
|
@ -403,55 +403,66 @@ def realpath(filename, *, strict=False):
|
||||||
"""Return the canonical path of the specified filename, eliminating any
|
"""Return the canonical path of the specified filename, eliminating any
|
||||||
symbolic links encountered in the path."""
|
symbolic links encountered in the path."""
|
||||||
filename = os.fspath(filename)
|
filename = os.fspath(filename)
|
||||||
path, ok = _joinrealpath(filename[:0], filename, strict, {})
|
if isinstance(filename, bytes):
|
||||||
return abspath(path)
|
|
||||||
|
|
||||||
# Join two paths, normalizing and eliminating any symbolic links
|
|
||||||
# encountered in the second path.
|
|
||||||
# Two leading slashes are replaced by a single slash.
|
|
||||||
def _joinrealpath(path, rest, strict, seen):
|
|
||||||
if isinstance(path, bytes):
|
|
||||||
sep = b'/'
|
sep = b'/'
|
||||||
curdir = b'.'
|
curdir = b'.'
|
||||||
pardir = b'..'
|
pardir = b'..'
|
||||||
|
getcwd = os.getcwdb
|
||||||
else:
|
else:
|
||||||
sep = '/'
|
sep = '/'
|
||||||
curdir = '.'
|
curdir = '.'
|
||||||
pardir = '..'
|
pardir = '..'
|
||||||
|
getcwd = os.getcwd
|
||||||
|
|
||||||
if rest.startswith(sep):
|
# The stack of unresolved path parts. When popped, a special value of None
|
||||||
rest = rest[1:]
|
# indicates that a symlink target has been resolved, and that the original
|
||||||
path = sep
|
# symlink path can be retrieved by popping again. The [::-1] slice is a
|
||||||
|
# very fast way of spelling list(reversed(...)).
|
||||||
|
rest = filename.split(sep)[::-1]
|
||||||
|
|
||||||
|
# The resolved path, which is absolute throughout this function.
|
||||||
|
# Note: getcwd() returns a normalized and symlink-free path.
|
||||||
|
path = sep if filename.startswith(sep) else getcwd()
|
||||||
|
|
||||||
|
# Mapping from symlink paths to *fully resolved* symlink targets. If a
|
||||||
|
# symlink is encountered but not yet resolved, the value is None. This is
|
||||||
|
# used both to detect symlink loops and to speed up repeated traversals of
|
||||||
|
# the same links.
|
||||||
|
seen = {}
|
||||||
|
|
||||||
|
# Whether we're calling lstat() and readlink() to resolve symlinks. If we
|
||||||
|
# encounter an OSError for a symlink loop in non-strict mode, this is
|
||||||
|
# switched off.
|
||||||
|
querying = True
|
||||||
|
|
||||||
while rest:
|
while rest:
|
||||||
name, _, rest = rest.partition(sep)
|
name = rest.pop()
|
||||||
|
if name is None:
|
||||||
|
# resolved symlink target
|
||||||
|
seen[rest.pop()] = path
|
||||||
|
continue
|
||||||
if not name or name == curdir:
|
if not name or name == curdir:
|
||||||
# current dir
|
# current dir
|
||||||
continue
|
continue
|
||||||
if name == pardir:
|
if name == pardir:
|
||||||
# parent dir
|
# parent dir
|
||||||
if path:
|
path = path[:path.rindex(sep)] or sep
|
||||||
parent, name = split(path)
|
continue
|
||||||
if name == pardir:
|
if path == sep:
|
||||||
# ../..
|
newpath = path + name
|
||||||
path = join(path, pardir)
|
else:
|
||||||
else:
|
newpath = path + sep + name
|
||||||
# foo/bar/.. -> foo
|
if not querying:
|
||||||
path = parent
|
path = newpath
|
||||||
else:
|
|
||||||
# ..
|
|
||||||
path = pardir
|
|
||||||
continue
|
continue
|
||||||
newpath = join(path, name)
|
|
||||||
try:
|
try:
|
||||||
st = os.lstat(newpath)
|
st = os.lstat(newpath)
|
||||||
|
if not stat.S_ISLNK(st.st_mode):
|
||||||
|
path = newpath
|
||||||
|
continue
|
||||||
except OSError:
|
except OSError:
|
||||||
if strict:
|
if strict:
|
||||||
raise
|
raise
|
||||||
is_link = False
|
|
||||||
else:
|
|
||||||
is_link = stat.S_ISLNK(st.st_mode)
|
|
||||||
if not is_link:
|
|
||||||
path = newpath
|
path = newpath
|
||||||
continue
|
continue
|
||||||
# Resolve the symbolic link
|
# Resolve the symbolic link
|
||||||
|
@ -467,14 +478,23 @@ def _joinrealpath(path, rest, strict, seen):
|
||||||
os.stat(newpath)
|
os.stat(newpath)
|
||||||
else:
|
else:
|
||||||
# Return already resolved part + rest of the path unchanged.
|
# Return already resolved part + rest of the path unchanged.
|
||||||
return join(newpath, rest), False
|
path = newpath
|
||||||
|
querying = False
|
||||||
|
continue
|
||||||
seen[newpath] = None # not resolved symlink
|
seen[newpath] = None # not resolved symlink
|
||||||
path, ok = _joinrealpath(path, os.readlink(newpath), strict, seen)
|
target = os.readlink(newpath)
|
||||||
if not ok:
|
if target.startswith(sep):
|
||||||
return join(path, rest), False
|
# Symlink target is absolute; reset resolved path.
|
||||||
seen[newpath] = path # resolved symlink
|
path = sep
|
||||||
|
# Push the symlink path onto the stack, and signal its specialness by
|
||||||
|
# also pushing None. When these entries are popped, we'll record the
|
||||||
|
# fully-resolved symlink target in the 'seen' mapping.
|
||||||
|
rest.append(newpath)
|
||||||
|
rest.append(None)
|
||||||
|
# Push the unresolved symlink target parts onto the stack.
|
||||||
|
rest.extend(target.split(sep)[::-1])
|
||||||
|
|
||||||
return path, True
|
return path
|
||||||
|
|
||||||
|
|
||||||
supports_unicode_filenames = (sys.platform == 'darwin')
|
supports_unicode_filenames = (sys.platform == 'darwin')
|
||||||
|
|
|
@ -456,6 +456,15 @@ class PosixPathTest(unittest.TestCase):
|
||||||
finally:
|
finally:
|
||||||
os_helper.unlink(ABSTFN)
|
os_helper.unlink(ABSTFN)
|
||||||
|
|
||||||
|
@os_helper.skip_unless_symlink
|
||||||
|
@skip_if_ABSTFN_contains_backslash
|
||||||
|
def test_realpath_missing_pardir(self):
|
||||||
|
try:
|
||||||
|
os.symlink(os_helper.TESTFN + "1", os_helper.TESTFN)
|
||||||
|
self.assertEqual(realpath("nonexistent/../" + os_helper.TESTFN), ABSTFN + "1")
|
||||||
|
finally:
|
||||||
|
os_helper.unlink(os_helper.TESTFN)
|
||||||
|
|
||||||
@os_helper.skip_unless_symlink
|
@os_helper.skip_unless_symlink
|
||||||
@skip_if_ABSTFN_contains_backslash
|
@skip_if_ABSTFN_contains_backslash
|
||||||
def test_realpath_symlink_loops(self):
|
def test_realpath_symlink_loops(self):
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Speed up :func:`os.path.realpath` on non-Windows platforms.
|
Loading…
Reference in New Issue