1995-01-27 10:41:45 +08:00
|
|
|
"""Filename matching with shell patterns.
|
1992-01-13 07:29:29 +08:00
|
|
|
|
1995-01-27 10:41:45 +08:00
|
|
|
fnmatch(FILENAME, PATTERN) matches according to the local convention.
|
|
|
|
fnmatchcase(FILENAME, PATTERN) always takes case in account.
|
1992-01-13 07:29:29 +08:00
|
|
|
|
1995-01-27 10:41:45 +08:00
|
|
|
The functions operate by translating the pattern into a regular
|
|
|
|
expression. They cache the compiled regular expressions for speed.
|
|
|
|
|
|
|
|
The function translate(PATTERN) returns a regular expression
|
|
|
|
corresponding to PATTERN. (It does not compile it.)
|
|
|
|
"""
|
2010-07-24 00:22:25 +08:00
|
|
|
import os
|
|
|
|
import posixpath
|
1997-10-23 05:00:49 +08:00
|
|
|
import re
|
2010-08-14 00:26:40 +08:00
|
|
|
import functools
|
1997-10-23 05:00:49 +08:00
|
|
|
|
2010-08-14 00:26:40 +08:00
|
|
|
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
|
2010-07-24 00:22:25 +08:00
|
|
|
|
1991-01-02 02:11:14 +08:00
|
|
|
def fnmatch(name, pat):
|
2001-01-15 07:36:06 +08:00
|
|
|
"""Test whether FILENAME matches PATTERN.
|
|
|
|
|
|
|
|
Patterns are Unix shell style:
|
|
|
|
|
|
|
|
* matches everything
|
|
|
|
? matches any single character
|
|
|
|
[seq] matches any character in seq
|
|
|
|
[!seq] matches any char not in seq
|
|
|
|
|
|
|
|
An initial period in FILENAME is not special.
|
|
|
|
Both FILENAME and PATTERN are first case-normalized
|
|
|
|
if the operating system requires it.
|
|
|
|
If you don't want this, use fnmatchcase(FILENAME, PATTERN).
|
|
|
|
"""
|
|
|
|
name = os.path.normcase(name)
|
|
|
|
pat = os.path.normcase(pat)
|
|
|
|
return fnmatchcase(name, pat)
|
1995-01-27 10:41:45 +08:00
|
|
|
|
2021-07-15 18:53:26 +08:00
|
|
|
@functools.lru_cache(maxsize=32768, typed=True)
|
2011-10-21 00:22:10 +08:00
|
|
|
def _compile_pattern(pat):
|
|
|
|
if isinstance(pat, bytes):
|
2010-08-14 00:26:40 +08:00
|
|
|
pat_str = str(pat, 'ISO-8859-1')
|
|
|
|
res_str = translate(pat_str)
|
|
|
|
res = bytes(res_str, 'ISO-8859-1')
|
|
|
|
else:
|
|
|
|
res = translate(pat)
|
|
|
|
return re.compile(res).match
|
2010-07-24 00:22:25 +08:00
|
|
|
|
2001-06-06 14:24:38 +08:00
|
|
|
def filter(names, pat):
|
2020-12-19 03:10:20 +08:00
|
|
|
"""Construct a list from those elements of the iterable NAMES that match PAT."""
|
2008-10-03 02:55:37 +08:00
|
|
|
result = []
|
|
|
|
pat = os.path.normcase(pat)
|
2011-10-21 00:22:10 +08:00
|
|
|
match = _compile_pattern(pat)
|
2001-06-06 14:24:38 +08:00
|
|
|
if os.path is posixpath:
|
|
|
|
# normcase on posix is NOP. Optimize it away from the loop.
|
|
|
|
for name in names:
|
|
|
|
if match(name):
|
|
|
|
result.append(name)
|
|
|
|
else:
|
|
|
|
for name in names:
|
|
|
|
if match(os.path.normcase(name)):
|
|
|
|
result.append(name)
|
|
|
|
return result
|
|
|
|
|
1995-01-27 10:41:45 +08:00
|
|
|
def fnmatchcase(name, pat):
|
2001-01-15 07:36:06 +08:00
|
|
|
"""Test whether FILENAME matches PATTERN, including case.
|
|
|
|
|
|
|
|
This is a version of fnmatch() which doesn't case-normalize
|
|
|
|
its arguments.
|
|
|
|
"""
|
2011-10-21 00:22:10 +08:00
|
|
|
match = _compile_pattern(pat)
|
2008-10-03 02:55:37 +08:00
|
|
|
return match(name) is not None
|
1991-01-02 02:11:14 +08:00
|
|
|
|
2010-07-24 00:22:25 +08:00
|
|
|
|
1992-01-13 07:29:29 +08:00
|
|
|
def translate(pat):
|
2001-01-15 07:36:06 +08:00
|
|
|
"""Translate a shell PATTERN to a regular expression.
|
|
|
|
|
|
|
|
There is no way to quote meta-characters.
|
|
|
|
"""
|
|
|
|
|
2020-05-06 10:28:24 +08:00
|
|
|
STAR = object()
|
2023-11-14 01:15:56 +08:00
|
|
|
parts = _translate(pat, STAR, '.')
|
|
|
|
return _join_translated_parts(parts, STAR)
|
|
|
|
|
|
|
|
|
|
|
|
def _translate(pat, STAR, QUESTION_MARK):
|
2020-05-06 10:28:24 +08:00
|
|
|
res = []
|
|
|
|
add = res.append
|
2001-01-15 07:36:06 +08:00
|
|
|
i, n = 0, len(pat)
|
|
|
|
while i < n:
|
|
|
|
c = pat[i]
|
|
|
|
i = i+1
|
|
|
|
if c == '*':
|
2020-05-06 10:28:24 +08:00
|
|
|
# compress consecutive `*` into one
|
|
|
|
if (not res) or res[-1] is not STAR:
|
|
|
|
add(STAR)
|
2001-01-15 07:36:06 +08:00
|
|
|
elif c == '?':
|
2023-11-14 01:15:56 +08:00
|
|
|
add(QUESTION_MARK)
|
2001-01-15 07:36:06 +08:00
|
|
|
elif c == '[':
|
|
|
|
j = i
|
|
|
|
if j < n and pat[j] == '!':
|
|
|
|
j = j+1
|
|
|
|
if j < n and pat[j] == ']':
|
|
|
|
j = j+1
|
|
|
|
while j < n and pat[j] != ']':
|
|
|
|
j = j+1
|
|
|
|
if j >= n:
|
2020-05-06 10:28:24 +08:00
|
|
|
add('\\[')
|
2001-01-15 07:36:06 +08:00
|
|
|
else:
|
2018-02-09 19:30:19 +08:00
|
|
|
stuff = pat[i:j]
|
2022-06-05 16:46:29 +08:00
|
|
|
if '-' not in stuff:
|
2018-02-09 19:30:19 +08:00
|
|
|
stuff = stuff.replace('\\', r'\\')
|
|
|
|
else:
|
|
|
|
chunks = []
|
|
|
|
k = i+2 if pat[i] == '!' else i+1
|
|
|
|
while True:
|
|
|
|
k = pat.find('-', k, j)
|
|
|
|
if k < 0:
|
|
|
|
break
|
|
|
|
chunks.append(pat[i:k])
|
|
|
|
i = k+1
|
|
|
|
k = k+3
|
2022-06-05 16:46:29 +08:00
|
|
|
chunk = pat[i:j]
|
|
|
|
if chunk:
|
|
|
|
chunks.append(chunk)
|
|
|
|
else:
|
|
|
|
chunks[-1] += '-'
|
|
|
|
# Remove empty ranges -- invalid in RE.
|
|
|
|
for k in range(len(chunks)-1, 0, -1):
|
|
|
|
if chunks[k-1][-1] > chunks[k][0]:
|
|
|
|
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
|
|
|
|
del chunks[k]
|
2018-02-09 19:30:19 +08:00
|
|
|
# Escape backslashes and hyphens for set difference (--).
|
|
|
|
# Hyphens that create ranges shouldn't be escaped.
|
|
|
|
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
|
|
|
|
for s in chunks)
|
|
|
|
# Escape set operations (&&, ~~ and ||).
|
|
|
|
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
|
2001-01-15 07:36:06 +08:00
|
|
|
i = j+1
|
2022-06-05 16:46:29 +08:00
|
|
|
if not stuff:
|
|
|
|
# Empty range: never match.
|
|
|
|
add('(?!)')
|
|
|
|
elif stuff == '!':
|
|
|
|
# Negated empty range: match any character.
|
|
|
|
add('.')
|
|
|
|
else:
|
|
|
|
if stuff[0] == '!':
|
|
|
|
stuff = '^' + stuff[1:]
|
|
|
|
elif stuff[0] in ('^', '['):
|
|
|
|
stuff = '\\' + stuff
|
|
|
|
add(f'[{stuff}]')
|
2001-01-15 07:36:06 +08:00
|
|
|
else:
|
2020-05-06 10:28:24 +08:00
|
|
|
add(re.escape(c))
|
|
|
|
assert i == n
|
2023-11-14 01:15:56 +08:00
|
|
|
return res
|
|
|
|
|
2020-05-06 10:28:24 +08:00
|
|
|
|
2023-11-14 01:15:56 +08:00
|
|
|
def _join_translated_parts(inp, STAR):
|
2020-05-06 10:28:24 +08:00
|
|
|
# Deal with STARs.
|
|
|
|
res = []
|
|
|
|
add = res.append
|
|
|
|
i, n = 0, len(inp)
|
|
|
|
# Fixed pieces at the start?
|
|
|
|
while i < n and inp[i] is not STAR:
|
|
|
|
add(inp[i])
|
|
|
|
i += 1
|
|
|
|
# Now deal with STAR fixed STAR fixed ...
|
|
|
|
# For an interior `STAR fixed` pairing, we want to do a minimal
|
|
|
|
# .*? match followed by `fixed`, with no possibility of backtracking.
|
2022-03-22 01:49:43 +08:00
|
|
|
# Atomic groups ("(?>...)") allow us to spell that directly.
|
|
|
|
# Note: people rely on the undocumented ability to join multiple
|
|
|
|
# translate() results together via "|" to build large regexps matching
|
|
|
|
# "one of many" shell patterns.
|
2020-05-06 10:28:24 +08:00
|
|
|
while i < n:
|
|
|
|
assert inp[i] is STAR
|
|
|
|
i += 1
|
|
|
|
if i == n:
|
|
|
|
add(".*")
|
|
|
|
break
|
|
|
|
assert inp[i] is not STAR
|
|
|
|
fixed = []
|
|
|
|
while i < n and inp[i] is not STAR:
|
|
|
|
fixed.append(inp[i])
|
|
|
|
i += 1
|
|
|
|
fixed = "".join(fixed)
|
|
|
|
if i == n:
|
|
|
|
add(".*")
|
|
|
|
add(fixed)
|
|
|
|
else:
|
2022-03-22 01:49:43 +08:00
|
|
|
add(f"(?>.*?{fixed})")
|
2020-05-06 10:28:24 +08:00
|
|
|
assert i == n
|
|
|
|
res = "".join(res)
|
|
|
|
return fr'(?s:{res})\Z'
|