bpo-37578: glob.glob -- added include_hidden parameter (GH-30153)

Automerge-Triggered-By: GH:asvetlov
This commit is contained in:
andrei kulakov 2021-12-18 09:23:34 -05:00 committed by GitHub
parent 6f2df42951
commit ae36cd1e79
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 68 additions and 23 deletions

View File

@ -36,7 +36,8 @@ For example, ``'[?]'`` matches the character ``'?'``.
The :mod:`pathlib` module offers high-level path objects. The :mod:`pathlib` module offers high-level path objects.
.. function:: glob(pathname, *, root_dir=None, dir_fd=None, recursive=False) .. function:: glob(pathname, *, root_dir=None, dir_fd=None, recursive=False, \
include_hidden=False)
Return a possibly-empty list of path names that match *pathname*, which must be Return a possibly-empty list of path names that match *pathname*, which must be
a string containing a path specification. *pathname* can be either absolute a string containing a path specification. *pathname* can be either absolute
@ -64,6 +65,8 @@ For example, ``'[?]'`` matches the character ``'?'``.
pattern is followed by an :data:`os.sep` or :data:`os.altsep` then files will not pattern is followed by an :data:`os.sep` or :data:`os.altsep` then files will not
match. match.
If *include_hidden* is true, "``**``" pattern will match hidden directories.
.. audit-event:: glob.glob pathname,recursive glob.glob .. audit-event:: glob.glob pathname,recursive glob.glob
.. audit-event:: glob.glob/2 pathname,recursive,root_dir,dir_fd glob.glob .. audit-event:: glob.glob/2 pathname,recursive,root_dir,dir_fd glob.glob
@ -77,8 +80,12 @@ For example, ``'[?]'`` matches the character ``'?'``.
.. versionchanged:: 3.10 .. versionchanged:: 3.10
Added the *root_dir* and *dir_fd* parameters. Added the *root_dir* and *dir_fd* parameters.
.. versionchanged:: 3.11
Added the *include_hidden* parameter.
.. function:: iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False)
.. function:: iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False, \
include_hidden=False)
Return an :term:`iterator` which yields the same values as :func:`glob` Return an :term:`iterator` which yields the same values as :func:`glob`
without actually storing them all simultaneously. without actually storing them all simultaneously.
@ -92,6 +99,9 @@ For example, ``'[?]'`` matches the character ``'?'``.
.. versionchanged:: 3.10 .. versionchanged:: 3.10
Added the *root_dir* and *dir_fd* parameters. Added the *root_dir* and *dir_fd* parameters.
.. versionchanged:: 3.11
Added the *include_hidden* parameter.
.. function:: escape(pathname) .. function:: escape(pathname)

View File

@ -10,20 +10,26 @@ import sys
__all__ = ["glob", "iglob", "escape"] __all__ = ["glob", "iglob", "escape"]
def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False): def glob(pathname, *, root_dir=None, dir_fd=None, recursive=False,
include_hidden=False):
"""Return a list of paths matching a pathname pattern. """Return a list of paths matching a pathname pattern.
The pattern may contain simple shell-style wildcards a la The pattern may contain simple shell-style wildcards a la
fnmatch. However, unlike fnmatch, filenames starting with a fnmatch. Unlike fnmatch, filenames starting with a
dot are special cases that are not matched by '*' and '?' dot are special cases that are not matched by '*' and '?'
patterns. patterns by default.
If recursive is true, the pattern '**' will match any files and If `include_hidden` is true, the patterns '*', '?', '**' will match hidden
directories.
If `recursive` is true, the pattern '**' will match any files and
zero or more directories and subdirectories. zero or more directories and subdirectories.
""" """
return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive)) return list(iglob(pathname, root_dir=root_dir, dir_fd=dir_fd, recursive=recursive,
include_hidden=include_hidden))
def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False): def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False,
include_hidden=False):
"""Return an iterator which yields the paths matching a pathname pattern. """Return an iterator which yields the paths matching a pathname pattern.
The pattern may contain simple shell-style wildcards a la The pattern may contain simple shell-style wildcards a la
@ -40,7 +46,8 @@ def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False):
root_dir = os.fspath(root_dir) root_dir = os.fspath(root_dir)
else: else:
root_dir = pathname[:0] root_dir = pathname[:0]
it = _iglob(pathname, root_dir, dir_fd, recursive, False) it = _iglob(pathname, root_dir, dir_fd, recursive, False,
include_hidden=include_hidden)
if not pathname or recursive and _isrecursive(pathname[:2]): if not pathname or recursive and _isrecursive(pathname[:2]):
try: try:
s = next(it) # skip empty string s = next(it) # skip empty string
@ -50,7 +57,8 @@ def iglob(pathname, *, root_dir=None, dir_fd=None, recursive=False):
pass pass
return it return it
def _iglob(pathname, root_dir, dir_fd, recursive, dironly): def _iglob(pathname, root_dir, dir_fd, recursive, dironly,
include_hidden=False):
dirname, basename = os.path.split(pathname) dirname, basename = os.path.split(pathname)
if not has_magic(pathname): if not has_magic(pathname):
assert not dironly assert not dironly
@ -64,15 +72,18 @@ def _iglob(pathname, root_dir, dir_fd, recursive, dironly):
return return
if not dirname: if not dirname:
if recursive and _isrecursive(basename): if recursive and _isrecursive(basename):
yield from _glob2(root_dir, basename, dir_fd, dironly) yield from _glob2(root_dir, basename, dir_fd, dironly,
include_hidden=include_hidden)
else: else:
yield from _glob1(root_dir, basename, dir_fd, dironly) yield from _glob1(root_dir, basename, dir_fd, dironly,
include_hidden=include_hidden)
return return
# `os.path.split()` returns the argument itself as a dirname if it is a # `os.path.split()` returns the argument itself as a dirname if it is a
# drive or UNC path. Prevent an infinite recursion if a drive or UNC path # drive or UNC path. Prevent an infinite recursion if a drive or UNC path
# contains magic characters (i.e. r'\\?\C:'). # contains magic characters (i.e. r'\\?\C:').
if dirname != pathname and has_magic(dirname): if dirname != pathname and has_magic(dirname):
dirs = _iglob(dirname, root_dir, dir_fd, recursive, True) dirs = _iglob(dirname, root_dir, dir_fd, recursive, True,
include_hidden=include_hidden)
else: else:
dirs = [dirname] dirs = [dirname]
if has_magic(basename): if has_magic(basename):
@ -83,20 +94,21 @@ def _iglob(pathname, root_dir, dir_fd, recursive, dironly):
else: else:
glob_in_dir = _glob0 glob_in_dir = _glob0
for dirname in dirs: for dirname in dirs:
for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly): for name in glob_in_dir(_join(root_dir, dirname), basename, dir_fd, dironly,
include_hidden=include_hidden):
yield os.path.join(dirname, name) yield os.path.join(dirname, name)
# These 2 helper functions non-recursively glob inside a literal directory. # These 2 helper functions non-recursively glob inside a literal directory.
# They return a list of basenames. _glob1 accepts a pattern while _glob0 # They return a list of basenames. _glob1 accepts a pattern while _glob0
# takes a literal basename (so it only has to check for its existence). # takes a literal basename (so it only has to check for its existence).
def _glob1(dirname, pattern, dir_fd, dironly): def _glob1(dirname, pattern, dir_fd, dironly, include_hidden=False):
names = _listdir(dirname, dir_fd, dironly) names = _listdir(dirname, dir_fd, dironly)
if not _ishidden(pattern): if include_hidden or not _ishidden(pattern):
names = (x for x in names if not _ishidden(x)) names = (x for x in names if include_hidden or not _ishidden(x))
return fnmatch.filter(names, pattern) return fnmatch.filter(names, pattern)
def _glob0(dirname, basename, dir_fd, dironly): def _glob0(dirname, basename, dir_fd, dironly, include_hidden=False):
if basename: if basename:
if _lexists(_join(dirname, basename), dir_fd): if _lexists(_join(dirname, basename), dir_fd):
return [basename] return [basename]
@ -118,10 +130,11 @@ def glob1(dirname, pattern):
# This helper function recursively yields relative pathnames inside a literal # This helper function recursively yields relative pathnames inside a literal
# directory. # directory.
def _glob2(dirname, pattern, dir_fd, dironly): def _glob2(dirname, pattern, dir_fd, dironly, include_hidden=False):
assert _isrecursive(pattern) assert _isrecursive(pattern)
yield pattern[:0] yield pattern[:0]
yield from _rlistdir(dirname, dir_fd, dironly) yield from _rlistdir(dirname, dir_fd, dironly,
include_hidden=include_hidden)
# If dironly is false, yields all file names inside a directory. # If dironly is false, yields all file names inside a directory.
# If dironly is true, yields only directory names. # If dironly is true, yields only directory names.
@ -164,13 +177,14 @@ def _listdir(dirname, dir_fd, dironly):
return list(it) return list(it)
# Recursively yields relative pathnames inside a literal directory. # Recursively yields relative pathnames inside a literal directory.
def _rlistdir(dirname, dir_fd, dironly): def _rlistdir(dirname, dir_fd, dironly, include_hidden=False):
names = _listdir(dirname, dir_fd, dironly) names = _listdir(dirname, dir_fd, dironly)
for x in names: for x in names:
if not _ishidden(x): if include_hidden or not _ishidden(x):
yield x yield x
path = _join(dirname, x) if dirname else x path = _join(dirname, x) if dirname else x
for y in _rlistdir(path, dir_fd, dironly): for y in _rlistdir(path, dir_fd, dironly,
include_hidden=include_hidden):
yield _join(x, y) yield _join(x, y)

View File

@ -30,6 +30,7 @@ class GlobTests(unittest.TestCase):
self.mktemp('aab', 'F') self.mktemp('aab', 'F')
self.mktemp('.aa', 'G') self.mktemp('.aa', 'G')
self.mktemp('.bb', 'H') self.mktemp('.bb', 'H')
self.mktemp('.bb', '.J')
self.mktemp('aaa', 'zzzF') self.mktemp('aaa', 'zzzF')
self.mktemp('ZZZ') self.mktemp('ZZZ')
self.mktemp('EF') self.mktemp('EF')
@ -56,7 +57,9 @@ class GlobTests(unittest.TestCase):
pattern = os.path.join(*parts) pattern = os.path.join(*parts)
p = os.path.join(self.tempdir, pattern) p = os.path.join(self.tempdir, pattern)
res = glob.glob(p, **kwargs) res = glob.glob(p, **kwargs)
res2 = glob.iglob(p, **kwargs)
self.assertCountEqual(glob.iglob(p, **kwargs), res) self.assertCountEqual(glob.iglob(p, **kwargs), res)
bres = [os.fsencode(x) for x in res] bres = [os.fsencode(x) for x in res]
self.assertCountEqual(glob.glob(os.fsencode(p), **kwargs), bres) self.assertCountEqual(glob.glob(os.fsencode(p), **kwargs), bres)
self.assertCountEqual(glob.iglob(os.fsencode(p), **kwargs), bres) self.assertCountEqual(glob.iglob(os.fsencode(p), **kwargs), bres)
@ -249,6 +252,17 @@ class GlobTests(unittest.TestCase):
def rglob(self, *parts, **kwargs): def rglob(self, *parts, **kwargs):
return self.glob(*parts, recursive=True, **kwargs) return self.glob(*parts, recursive=True, **kwargs)
def hglob(self, *parts, **kwargs):
return self.glob(*parts, include_hidden=True, **kwargs)
def test_hidden_glob(self):
eq = self.assertSequencesEqual_noorder
l = [('aaa',), ('.aa',)]
eq(self.hglob('?aa'), self.joins(*l))
eq(self.hglob('*aa'), self.joins(*l))
l2 = [('.aa','G',)]
eq(self.hglob('**', 'G'), self.joins(*l2))
def test_recursive_glob(self): def test_recursive_glob(self):
eq = self.assertSequencesEqual_noorder eq = self.assertSequencesEqual_noorder
full = [('EF',), ('ZZZ',), full = [('EF',), ('ZZZ',),
@ -314,6 +328,10 @@ class GlobTests(unittest.TestCase):
expect += [join('sym3', 'EF')] expect += [join('sym3', 'EF')]
eq(glob.glob(join('**', 'EF'), recursive=True), expect) eq(glob.glob(join('**', 'EF'), recursive=True), expect)
rec = [('.bb','H'), ('.bb','.J'), ('.aa','G'), ('.aa',), ('.bb',)]
eq(glob.glob('**', recursive=True, include_hidden=True),
[join(*i) for i in full+rec])
def test_glob_many_open_files(self): def test_glob_many_open_files(self):
depth = 30 depth = 30
base = os.path.join(self.tempdir, 'deep') base = os.path.join(self.tempdir, 'deep')

View File

@ -0,0 +1,3 @@
Add *include_hidden* parameter to :func:`~glob.glob` and :func:`~glob.iglob` to
match hidden files and directories when using special characters like ``*``,
``**``, ``?`` and ``[]``.