Issue #23605: Fix os.walk(topdown=True), don't cache entry.is_symlink() because

the caller can replace the directory with a different file kind.

The bottom-up way, os.walk(topdown=False), still uses entry.is_symlink(), and
so can be faster than Python 3.4.
This commit is contained in:
Victor Stinner 2015-03-18 11:29:47 +01:00
parent a47fc5c2dd
commit 7fea974baf

View File

@ -354,7 +354,6 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
dirs = []
nondirs = []
symlinks = set()
# We may not have read permission for top, in which case we can't
# get a list of the files the directory contains. os.walk
@ -364,46 +363,69 @@ def walk(top, topdown=True, onerror=None, followlinks=False):
try:
# Note that scandir is global in this module due
# to earlier import-*.
for entry in scandir(top):
scandir_it = scandir(top)
except OSError as error:
if onerror is not None:
onerror(error)
return
while True:
try:
try:
is_dir = entry.is_dir()
except OSError:
# If is_dir() raises an OSError, consider that the entry is not
# a directory, same behaviour than os.path.isdir().
is_dir = False
entry = next(scandir_it)
except StopIteration:
break
except OSError as error:
if onerror is not None:
onerror(error)
return
if is_dir:
dirs.append(entry.name)
try:
is_dir = entry.is_dir()
except OSError:
# If is_dir() raises an OSError, consider that the entry is not
# a directory, same behaviour than os.path.isdir().
is_dir = False
if is_dir:
dirs.append(entry.name)
else:
nondirs.append(entry.name)
if not topdown and is_dir:
# Bottom-up: recurse into sub-directory, but exclude symlinks to
# directories if followlinks is False
if followlinks:
walk_into = True
else:
nondirs.append(entry.name)
if is_dir and not followlinks:
try:
if entry.is_symlink():
symlinks.add(entry.name)
is_symlink = entry.is_symlink()
except OSError:
# If is_symlink() raises an OSError, consider that the
# entry is not a symbolik link, same behaviour than
# os.path.islink().
pass
except OSError as error:
# scandir() or iterating into scandir() iterator raised an OSError
if onerror is not None:
onerror(error)
return
is_symlink = False
walk_into = not is_symlink
if walk_into:
yield from walk(entry.path, topdown, onerror, followlinks)
# Yield before recursion if going top down
if topdown:
yield top, dirs, nondirs
# Recurse into sub-directories
for name in dirs:
if followlinks or name not in symlinks:
new_path = path.join(top, name)
yield from walk(new_path, topdown, onerror, followlinks)
# Yield after recursion if going bottom up
if not topdown:
# Recurse into sub-directories
islink, join = path.islink, path.join
for name in dirs:
new_path = join(top, name)
# Issue #23605: os.path.islink() is used instead of caching
# entry.is_symlink() result during the loop on os.scandir() because
# the caller can replace the directory entry during the "yield"
# above.
if followlinks or not islink(new_path):
yield from walk(new_path, topdown, onerror, followlinks)
else:
# Yield after recursion if going bottom up
yield top, dirs, nondirs
__all__.append("walk")