mirror of
https://github.com/python/cpython.git
synced 2024-11-24 10:24:35 +08:00
836f5433f7
`glob.glob()` currently calls itself recursively to build a list of matches of the dirname part of the pattern and then filters by the basename part. This is effectively BFS. ``glob.glob('*/*/*/*/*/foo')`` will build a huge list of all directories 5 levels deep even if only a handful of them contain a ``foo`` entry. A generator-based recusion would never have to store these list at once by implementing DFS. This patch converts the `glob` function to an `iglob` recursive generator . `glob()` now just returns ``list(iglob(pattern))``. I also cleaned up the code a bit (reduced duplicate `has_magic()` checks and created a second `glob0` helper func so that the main loop need not be duplicated). Thanks to Cherniavsky Beni for the patch!
75 lines
2.0 KiB
Python
75 lines
2.0 KiB
Python
"""Filename globbing utility."""
|
|
|
|
import os
|
|
import fnmatch
|
|
import re
|
|
|
|
__all__ = ["glob", "iglob"]
|
|
|
|
def glob(pathname):
|
|
"""Return a list of paths matching a pathname pattern.
|
|
|
|
The pattern may contain simple shell-style wildcards a la fnmatch.
|
|
|
|
"""
|
|
return list(iglob(pathname))
|
|
|
|
def iglob(pathname):
|
|
"""Return a list of paths matching a pathname pattern.
|
|
|
|
The pattern may contain simple shell-style wildcards a la fnmatch.
|
|
|
|
"""
|
|
if not has_magic(pathname):
|
|
if os.path.lexists(pathname):
|
|
yield pathname
|
|
return
|
|
dirname, basename = os.path.split(pathname)
|
|
if not dirname:
|
|
for name in glob1(os.curdir, basename):
|
|
yield name
|
|
return
|
|
if has_magic(dirname):
|
|
dirs = iglob(dirname)
|
|
else:
|
|
dirs = [dirname]
|
|
if has_magic(basename):
|
|
glob_in_dir = glob1
|
|
else:
|
|
glob_in_dir = glob0
|
|
for dirname in dirs:
|
|
for name in glob_in_dir(dirname, basename):
|
|
yield os.path.join(dirname, name)
|
|
|
|
# These 2 helper functions non-recursively glob inside a literal directory.
|
|
# They return a list of basenames. `glob1` accepts a pattern while `glob0`
|
|
# takes a literal basename (so it only has to check for its existence).
|
|
|
|
def glob1(dirname, pattern):
|
|
if not dirname:
|
|
dirname = os.curdir
|
|
try:
|
|
names = os.listdir(dirname)
|
|
except os.error:
|
|
return []
|
|
if pattern[0]!='.':
|
|
names=filter(lambda x: x[0]!='.',names)
|
|
return fnmatch.filter(names,pattern)
|
|
|
|
def glob0(dirname, basename):
|
|
if basename == '':
|
|
# `os.path.split()` returns an empty basename for paths ending with a
|
|
# directory separator. 'q*x/' should match only directories.
|
|
if os.isdir(dirname):
|
|
return [basename]
|
|
else:
|
|
if os.path.lexists(os.path.join(dirname, basename)):
|
|
return [basename]
|
|
return []
|
|
|
|
|
|
magic_check = re.compile('[*?[]')
|
|
|
|
def has_magic(s):
|
|
return magic_check.search(s) is not None
|