GH-114847: Speed up `posixpath.realpath()` (#114848)

Apply the following optimizations to `posixpath.realpath()`:

- Remove use of recursion
- Construct child paths directly rather than using `join()`
- Use `os.getcwd[b]()` rather than `abspath()`
- Use `startswith(sep)` rather than `isabs()`
- Use slicing rather than `split()`

Co-authored-by: Petr Viktorin <encukou@gmail.com>
This commit is contained in:
Barney Gale 2024-04-05 13:35:01 +01:00 committed by GitHub
parent 9ceaee74db
commit abfa16b44b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 64 additions and 34 deletions

View File

@ -403,55 +403,66 @@ def realpath(filename, *, strict=False):
"""Return the canonical path of the specified filename, eliminating any
symbolic links encountered in the path."""
filename = os.fspath(filename)
path, ok = _joinrealpath(filename[:0], filename, strict, {})
return abspath(path)
# Join two paths, normalizing and eliminating any symbolic links
# encountered in the second path.
# Two leading slashes are replaced by a single slash.
def _joinrealpath(path, rest, strict, seen):
if isinstance(path, bytes):
if isinstance(filename, bytes):
sep = b'/'
curdir = b'.'
pardir = b'..'
getcwd = os.getcwdb
else:
sep = '/'
curdir = '.'
pardir = '..'
getcwd = os.getcwd
if rest.startswith(sep):
rest = rest[1:]
path = sep
# The stack of unresolved path parts. When popped, a special value of None
# indicates that a symlink target has been resolved, and that the original
# symlink path can be retrieved by popping again. The [::-1] slice is a
# very fast way of spelling list(reversed(...)).
rest = filename.split(sep)[::-1]
# The resolved path, which is absolute throughout this function.
# Note: getcwd() returns a normalized and symlink-free path.
path = sep if filename.startswith(sep) else getcwd()
# Mapping from symlink paths to *fully resolved* symlink targets. If a
# symlink is encountered but not yet resolved, the value is None. This is
# used both to detect symlink loops and to speed up repeated traversals of
# the same links.
seen = {}
# Whether we're calling lstat() and readlink() to resolve symlinks. If we
# encounter an OSError for a symlink loop in non-strict mode, this is
# switched off.
querying = True
while rest:
name, _, rest = rest.partition(sep)
name = rest.pop()
if name is None:
# resolved symlink target
seen[rest.pop()] = path
continue
if not name or name == curdir:
# current dir
continue
if name == pardir:
# parent dir
if path:
parent, name = split(path)
if name == pardir:
# ../..
path = join(path, pardir)
else:
# foo/bar/.. -> foo
path = parent
else:
# ..
path = pardir
path = path[:path.rindex(sep)] or sep
continue
if path == sep:
newpath = path + name
else:
newpath = path + sep + name
if not querying:
path = newpath
continue
newpath = join(path, name)
try:
st = os.lstat(newpath)
if not stat.S_ISLNK(st.st_mode):
path = newpath
continue
except OSError:
if strict:
raise
is_link = False
else:
is_link = stat.S_ISLNK(st.st_mode)
if not is_link:
path = newpath
continue
# Resolve the symbolic link
@ -467,14 +478,23 @@ def _joinrealpath(path, rest, strict, seen):
os.stat(newpath)
else:
# Return already resolved part + rest of the path unchanged.
return join(newpath, rest), False
path = newpath
querying = False
continue
seen[newpath] = None # not resolved symlink
path, ok = _joinrealpath(path, os.readlink(newpath), strict, seen)
if not ok:
return join(path, rest), False
seen[newpath] = path # resolved symlink
target = os.readlink(newpath)
if target.startswith(sep):
# Symlink target is absolute; reset resolved path.
path = sep
# Push the symlink path onto the stack, and signal its specialness by
# also pushing None. When these entries are popped, we'll record the
# fully-resolved symlink target in the 'seen' mapping.
rest.append(newpath)
rest.append(None)
# Push the unresolved symlink target parts onto the stack.
rest.extend(target.split(sep)[::-1])
return path, True
return path
supports_unicode_filenames = (sys.platform == 'darwin')

View File

@ -456,6 +456,15 @@ class PosixPathTest(unittest.TestCase):
finally:
os_helper.unlink(ABSTFN)
@os_helper.skip_unless_symlink
@skip_if_ABSTFN_contains_backslash
def test_realpath_missing_pardir(self):
try:
os.symlink(os_helper.TESTFN + "1", os_helper.TESTFN)
self.assertEqual(realpath("nonexistent/../" + os_helper.TESTFN), ABSTFN + "1")
finally:
os_helper.unlink(os_helper.TESTFN)
@os_helper.skip_unless_symlink
@skip_if_ABSTFN_contains_backslash
def test_realpath_symlink_loops(self):

View File

@ -0,0 +1 @@
Speed up :func:`os.path.realpath` on non-Windows platforms.