GH-89727: Fix `os.fwalk()` recursion error on deep trees (#119638)

Implement `os.fwalk()` using a list as a stack to avoid emitting recursion
errors on deeply nested trees.
This commit is contained in:
Barney Gale 2024-05-30 04:05:36 +01:00 committed by GitHub
parent 2cc3502f98
commit 3c890b503c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 55 additions and 39 deletions

View File

@ -478,24 +478,52 @@ if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd:
"""
sys.audit("os.fwalk", top, topdown, onerror, follow_symlinks, dir_fd)
top = fspath(top)
# Note: To guard against symlink races, we use the standard
# lstat()/open()/fstat() trick.
if not follow_symlinks:
orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd)
topfd = open(top, O_RDONLY | O_NONBLOCK, dir_fd=dir_fd)
try:
if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and
path.samestat(orig_st, stat(topfd)))):
yield from _fwalk(topfd, top, isinstance(top, bytes),
topdown, onerror, follow_symlinks)
finally:
close(topfd)
stack = [(_fwalk_walk, (True, dir_fd, top, top, None))]
isbytes = isinstance(top, bytes)
while stack:
yield from _fwalk(stack, isbytes, topdown, onerror, follow_symlinks)
def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks):
# Each item in the _fwalk() stack is a pair (action, args).
_fwalk_walk = 0 # args: (isroot, dirfd, toppath, topname, entry)
_fwalk_yield = 1 # args: (toppath, dirnames, filenames, topfd)
_fwalk_close = 2 # args: dirfd
def _fwalk(stack, isbytes, topdown, onerror, follow_symlinks):
# Note: This uses O(depth of the directory tree) file descriptors: if
# necessary, it can be adapted to only require O(1) FDs, see issue
# #13734.
action, value = stack.pop()
if action == _fwalk_close:
close(value)
return
elif action == _fwalk_yield:
yield value
return
assert action == _fwalk_walk
isroot, dirfd, toppath, topname, entry = value
try:
if not follow_symlinks:
# Note: To guard against symlink races, we use the standard
# lstat()/open()/fstat() trick.
if entry is None:
orig_st = stat(topname, follow_symlinks=False, dir_fd=dirfd)
else:
orig_st = entry.stat(follow_symlinks=False)
topfd = open(topname, O_RDONLY | O_NONBLOCK, dir_fd=dirfd)
except OSError as err:
if isroot:
raise
if onerror is not None:
onerror(err)
return
stack.append((_fwalk_close, topfd))
if not follow_symlinks:
if isroot and not st.S_ISDIR(orig_st.st_mode):
return
if not path.samestat(orig_st, stat(topfd)):
return
scandir_it = scandir(topfd)
dirs = []
nondirs = []
@ -521,31 +549,18 @@ if {open, stat} <= supports_dir_fd and {scandir, stat} <= supports_fd:
if topdown:
yield toppath, dirs, nondirs, topfd
else:
stack.append((_fwalk_yield, (toppath, dirs, nondirs, topfd)))
for name in dirs if entries is None else zip(dirs, entries):
try:
if not follow_symlinks:
if topdown:
orig_st = stat(name, dir_fd=topfd, follow_symlinks=False)
else:
assert entries is not None
name, entry = name
orig_st = entry.stat(follow_symlinks=False)
dirfd = open(name, O_RDONLY | O_NONBLOCK, dir_fd=topfd)
except OSError as err:
if onerror is not None:
onerror(err)
continue
try:
if follow_symlinks or path.samestat(orig_st, stat(dirfd)):
dirpath = path.join(toppath, name)
yield from _fwalk(dirfd, dirpath, isbytes,
topdown, onerror, follow_symlinks)
finally:
close(dirfd)
if not topdown:
yield toppath, dirs, nondirs, topfd
toppath = path.join(toppath, toppath[:0]) # Add trailing slash.
if entries is None:
stack.extend(
(_fwalk_walk, (False, topfd, toppath + name, name, None))
for name in dirs[::-1])
else:
stack.extend(
(_fwalk_walk, (False, topfd, toppath + name, name, entry))
for name, entry in zip(dirs[::-1], entries[::-1]))
__all__.append("fwalk")

View File

@ -1687,8 +1687,6 @@ class FwalkTests(WalkTests):
# fwalk() keeps file descriptors open
test_walk_many_open_files = None
# fwalk() still uses recursion
test_walk_above_recursion_limit = None
class BytesWalkTests(WalkTests):

View File

@ -0,0 +1,3 @@
Fix issue with :func:`os.fwalk` where a :exc:`RecursionError` was raised on
deep directory trees by adjusting the implementation to be iterative instead
of recursive.