mirror of https://github.com/python/cpython
GH-114847: Speed up `posixpath.realpath()` (#114848)
Apply the following optimizations to `posixpath.realpath()`: - Remove use of recursion - Construct child paths directly rather than using `join()` - Use `os.getcwd[b]()` rather than `abspath()` - Use `startswith(sep)` rather than `isabs()` - Use slicing rather than `split()` Co-authored-by: Petr Viktorin <encukou@gmail.com>
This commit is contained in:
parent
9ceaee74db
commit
abfa16b44b
|
@ -403,55 +403,66 @@ def realpath(filename, *, strict=False):
|
|||
"""Return the canonical path of the specified filename, eliminating any
|
||||
symbolic links encountered in the path."""
|
||||
filename = os.fspath(filename)
|
||||
path, ok = _joinrealpath(filename[:0], filename, strict, {})
|
||||
return abspath(path)
|
||||
|
||||
# Join two paths, normalizing and eliminating any symbolic links
|
||||
# encountered in the second path.
|
||||
# Two leading slashes are replaced by a single slash.
|
||||
def _joinrealpath(path, rest, strict, seen):
|
||||
if isinstance(path, bytes):
|
||||
if isinstance(filename, bytes):
|
||||
sep = b'/'
|
||||
curdir = b'.'
|
||||
pardir = b'..'
|
||||
getcwd = os.getcwdb
|
||||
else:
|
||||
sep = '/'
|
||||
curdir = '.'
|
||||
pardir = '..'
|
||||
getcwd = os.getcwd
|
||||
|
||||
if rest.startswith(sep):
|
||||
rest = rest[1:]
|
||||
path = sep
|
||||
# The stack of unresolved path parts. When popped, a special value of None
|
||||
# indicates that a symlink target has been resolved, and that the original
|
||||
# symlink path can be retrieved by popping again. The [::-1] slice is a
|
||||
# very fast way of spelling list(reversed(...)).
|
||||
rest = filename.split(sep)[::-1]
|
||||
|
||||
# The resolved path, which is absolute throughout this function.
|
||||
# Note: getcwd() returns a normalized and symlink-free path.
|
||||
path = sep if filename.startswith(sep) else getcwd()
|
||||
|
||||
# Mapping from symlink paths to *fully resolved* symlink targets. If a
|
||||
# symlink is encountered but not yet resolved, the value is None. This is
|
||||
# used both to detect symlink loops and to speed up repeated traversals of
|
||||
# the same links.
|
||||
seen = {}
|
||||
|
||||
# Whether we're calling lstat() and readlink() to resolve symlinks. If we
|
||||
# encounter an OSError for a symlink loop in non-strict mode, this is
|
||||
# switched off.
|
||||
querying = True
|
||||
|
||||
while rest:
|
||||
name, _, rest = rest.partition(sep)
|
||||
name = rest.pop()
|
||||
if name is None:
|
||||
# resolved symlink target
|
||||
seen[rest.pop()] = path
|
||||
continue
|
||||
if not name or name == curdir:
|
||||
# current dir
|
||||
continue
|
||||
if name == pardir:
|
||||
# parent dir
|
||||
if path:
|
||||
parent, name = split(path)
|
||||
if name == pardir:
|
||||
# ../..
|
||||
path = join(path, pardir)
|
||||
else:
|
||||
# foo/bar/.. -> foo
|
||||
path = parent
|
||||
else:
|
||||
# ..
|
||||
path = pardir
|
||||
path = path[:path.rindex(sep)] or sep
|
||||
continue
|
||||
if path == sep:
|
||||
newpath = path + name
|
||||
else:
|
||||
newpath = path + sep + name
|
||||
if not querying:
|
||||
path = newpath
|
||||
continue
|
||||
newpath = join(path, name)
|
||||
try:
|
||||
st = os.lstat(newpath)
|
||||
if not stat.S_ISLNK(st.st_mode):
|
||||
path = newpath
|
||||
continue
|
||||
except OSError:
|
||||
if strict:
|
||||
raise
|
||||
is_link = False
|
||||
else:
|
||||
is_link = stat.S_ISLNK(st.st_mode)
|
||||
if not is_link:
|
||||
path = newpath
|
||||
continue
|
||||
# Resolve the symbolic link
|
||||
|
@ -467,14 +478,23 @@ def _joinrealpath(path, rest, strict, seen):
|
|||
os.stat(newpath)
|
||||
else:
|
||||
# Return already resolved part + rest of the path unchanged.
|
||||
return join(newpath, rest), False
|
||||
path = newpath
|
||||
querying = False
|
||||
continue
|
||||
seen[newpath] = None # not resolved symlink
|
||||
path, ok = _joinrealpath(path, os.readlink(newpath), strict, seen)
|
||||
if not ok:
|
||||
return join(path, rest), False
|
||||
seen[newpath] = path # resolved symlink
|
||||
target = os.readlink(newpath)
|
||||
if target.startswith(sep):
|
||||
# Symlink target is absolute; reset resolved path.
|
||||
path = sep
|
||||
# Push the symlink path onto the stack, and signal its specialness by
|
||||
# also pushing None. When these entries are popped, we'll record the
|
||||
# fully-resolved symlink target in the 'seen' mapping.
|
||||
rest.append(newpath)
|
||||
rest.append(None)
|
||||
# Push the unresolved symlink target parts onto the stack.
|
||||
rest.extend(target.split(sep)[::-1])
|
||||
|
||||
return path, True
|
||||
return path
|
||||
|
||||
|
||||
supports_unicode_filenames = (sys.platform == 'darwin')
|
||||
|
|
|
@ -456,6 +456,15 @@ class PosixPathTest(unittest.TestCase):
|
|||
finally:
|
||||
os_helper.unlink(ABSTFN)
|
||||
|
||||
@os_helper.skip_unless_symlink
|
||||
@skip_if_ABSTFN_contains_backslash
|
||||
def test_realpath_missing_pardir(self):
|
||||
try:
|
||||
os.symlink(os_helper.TESTFN + "1", os_helper.TESTFN)
|
||||
self.assertEqual(realpath("nonexistent/../" + os_helper.TESTFN), ABSTFN + "1")
|
||||
finally:
|
||||
os_helper.unlink(os_helper.TESTFN)
|
||||
|
||||
@os_helper.skip_unless_symlink
|
||||
@skip_if_ABSTFN_contains_backslash
|
||||
def test_realpath_symlink_loops(self):
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Speed up :func:`os.path.realpath` on non-Windows platforms.
|
Loading…
Reference in New Issue