cpython/Lib/fnmatch.py

"""Filename matching with shell patterns.

fnmatch(FILENAME, PATTERN) matches according to the local convention.
fnmatchcase(FILENAME, PATTERN) always takes case in account.

The functions operate by translating the pattern into a regular
expression.  They cache the compiled regular expressions for speed.

The function translate(PATTERN) returns a regular expression
corresponding to PATTERN.  (It does not compile it.)
"""
import os
import posixpath
import re
import functools

__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]

def fnmatch(name, pat):
    """Test whether FILENAME matches PATTERN.

    Patterns are Unix shell style:

    *       matches everything
    ?       matches any single character
    [seq]   matches any character in seq
    [!seq]  matches any char not in seq

    An initial period in FILENAME is not special.
    Both FILENAME and PATTERN are first case-normalized
    if the operating system requires it.
    If you don't want this, use fnmatchcase(FILENAME, PATTERN).
    """
    name = os.path.normcase(name)
    pat = os.path.normcase(pat)
    return fnmatchcase(name, pat)

@functools.lru_cache(maxsize=32768, typed=True)
def _compile_pattern(pat):
    if isinstance(pat, bytes):
        pat_str = str(pat, 'ISO-8859-1')
        res_str = translate(pat_str)
        res = bytes(res_str, 'ISO-8859-1')
    else:
        res = translate(pat)
    return re.compile(res).match

def filter(names, pat):
    """Construct a list from those elements of the iterable NAMES that match PAT."""
    result = []
    pat = os.path.normcase(pat)
    match = _compile_pattern(pat)
    if os.path is posixpath:
        # normcase on posix is NOP. Optimize it away from the loop.
        for name in names:
            if match(name):
                result.append(name)
    else:
        for name in names:
            if match(os.path.normcase(name)):
                result.append(name)
    return result

def fnmatchcase(name, pat):
    """Test whether FILENAME matches PATTERN, including case.

    This is a version of fnmatch() which doesn't case-normalize
    its arguments.
    """
    match = _compile_pattern(pat)
    return match(name) is not None


def translate(pat):
    """Translate a shell PATTERN to a regular expression.

    There is no way to quote meta-characters.
    """

    STAR = object()
    parts = _translate(pat, STAR, '.')
    return _join_translated_parts(parts, STAR)


def _translate(pat, STAR, QUESTION_MARK):
    res = []
    add = res.append
    i, n = 0, len(pat)
    while i < n:
        c = pat[i]
        i = i+1
        if c == '*':
            # compress consecutive `*` into one
            if (not res) or res[-1] is not STAR:
                add(STAR)
        elif c == '?':
            add(QUESTION_MARK)
        elif c == '[':
            j = i
            if j < n and pat[j] == '!':
                j = j+1
            if j < n and pat[j] == ']':
                j = j+1
            while j < n and pat[j] != ']':
                j = j+1
            if j >= n:
                add('\\[')
            else:
                stuff = pat[i:j]
                if '-' not in stuff:
                    stuff = stuff.replace('\\', r'\\')
                else:
                    chunks = []
                    k = i+2 if pat[i] == '!' else i+1
                    while True:
                        k = pat.find('-', k, j)
                        if k < 0:
                            break
                        chunks.append(pat[i:k])
                        i = k+1
                        k = k+3
                    chunk = pat[i:j]
                    if chunk:
                        chunks.append(chunk)
                    else:
                        chunks[-1] += '-'
                    # Remove empty ranges -- invalid in RE.
                    for k in range(len(chunks)-1, 0, -1):
                        if chunks[k-1][-1] > chunks[k][0]:
                            chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
                            del chunks[k]
                    # Escape backslashes and hyphens for set difference (--).
                    # Hyphens that create ranges shouldn't be escaped.
                    stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
                                     for s in chunks)
                # Escape set operations (&&, ~~ and ||).
                stuff = re.sub(r'([&~|])', r'\\\1', stuff)
                i = j+1
                if not stuff:
                    # Empty range: never match.
                    add('(?!)')
                elif stuff == '!':
                    # Negated empty range: match any character.
                    add('.')
                else:
                    if stuff[0] == '!':
                        stuff = '^' + stuff[1:]
                    elif stuff[0] in ('^', '['):
                        stuff = '\\' + stuff
                    add(f'[{stuff}]')
        else:
            add(re.escape(c))
    assert i == n
    return res


def _join_translated_parts(inp, STAR):
    # Deal with STARs.
    res = []
    add = res.append
    i, n = 0, len(inp)
    # Fixed pieces at the start?
    while i < n and inp[i] is not STAR:
        add(inp[i])
        i += 1
    # Now deal with STAR fixed STAR fixed ...
    # For an interior `STAR fixed` pairing, we want to do a minimal
    # .*? match followed by `fixed`, with no possibility of backtracking.
    # Atomic groups ("(?>...)") allow us to spell that directly.
    # Note: people rely on the undocumented ability to join multiple
    # translate() results together via "|" to build large regexps matching
    # "one of many" shell patterns.
    while i < n:
        assert inp[i] is STAR
        i += 1
        if i == n:
            add(".*")
            break
        assert inp[i] is not STAR
        fixed = []
        while i < n and inp[i] is not STAR:
            fixed.append(inp[i])
            i += 1
        fixed = "".join(fixed)
        if i == n:
            add(".*")
            add(fixed)
        else:
            add(f"(?>.*?{fixed})")
    assert i == n
    res = "".join(res)
    return fr'(?s:{res})\Z'
changes for the Mac 1995-01-27 03:41:45 +01:00			`"""Filename matching with shell patterns.`
Rewritten using regex. 1992-01-13 00:29:29 +01:00
changes for the Mac 1995-01-27 03:41:45 +01:00			`fnmatch(FILENAME, PATTERN) matches according to the local convention.`
			`fnmatchcase(FILENAME, PATTERN) always takes case in account.`
Rewritten using regex. 1992-01-13 00:29:29 +01:00
changes for the Mac 1995-01-27 03:41:45 +01:00			`The functions operate by translating the pattern into a regular`
			`expression. They cache the compiled regular expressions for speed.`

			`The function translate(PATTERN) returns a regular expression`
			`corresponding to PATTERN. (It does not compile it.)`
			`"""`
Make fnmatch be more PEP 8 compliant. Partially closes issue 9356. Thanks to Brian Brazil for the patch. 2010-07-23 18:22:25 +02:00			`import os`
			`import posixpath`
Convert all remaining simple cases of regex usage to re usage. 1997-10-22 23:00:49 +02:00			`import re`
Re-apply r83871. 2010-08-13 18:26:40 +02:00			`import functools`
Convert all remaining simple cases of regex usage to re usage. 1997-10-22 23:00:49 +02:00
Re-apply r83871. 2010-08-13 18:26:40 +02:00			`__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]`
Make fnmatch be more PEP 8 compliant. Partially closes issue 9356. Thanks to Brian Brazil for the patch. 2010-07-23 18:22:25 +02:00
Initial revision 1991-01-01 19:11:14 +01:00			`def fnmatch(name, pat):`
Whitespace normalization. 2001-01-15 00:36:06 +01:00			`"""Test whether FILENAME matches PATTERN.`

			`Patterns are Unix shell style:`

			`* matches everything`
			`? matches any single character`
			`[seq] matches any character in seq`
			`[!seq] matches any char not in seq`

			`An initial period in FILENAME is not special.`
			`Both FILENAME and PATTERN are first case-normalized`
			`if the operating system requires it.`
			`If you don't want this, use fnmatchcase(FILENAME, PATTERN).`
			`"""`
			`name = os.path.normcase(name)`
			`pat = os.path.normcase(pat)`
			`return fnmatchcase(name, pat)`
changes for the Mac 1995-01-27 03:41:45 +01:00
bpo-42799: fnmatch module: bump up size of lru_cache for patterns (GH-27084) 2021-07-15 12:53:26 +02:00			`@functools.lru_cache(maxsize=32768, typed=True)`
Simplify calls in fnmatch. 2011-10-20 18:22:10 +02:00			`def _compile_pattern(pat):`
			`if isinstance(pat, bytes):`
Re-apply r83871. 2010-08-13 18:26:40 +02:00			`pat_str = str(pat, 'ISO-8859-1')`
			`res_str = translate(pat_str)`
			`res = bytes(res_str, 'ISO-8859-1')`
			`else:`
			`res = translate(pat)`
			`return re.compile(res).match`
Make fnmatch be more PEP 8 compliant. Partially closes issue 9356. Thanks to Brian Brazil for the patch. 2010-07-23 18:22:25 +02:00
Patch #409973: Speedup glob.glob, add fnmatch.filter. 2001-06-06 08:24:38 +02:00			`def filter(names, pat):`
bpo-36769: Document that fnmatch.filter supports any kind of iterable (#13039) 2020-12-18 20:10:20 +01:00			`"""Construct a list from those elements of the iterable NAMES that match PAT."""`
Issue #3187: Better support for "undecodable" filenames. Code by Victor Stinner, with small tweaks by GvR. 2008-10-02 20:55:37 +02:00			`result = []`
			`pat = os.path.normcase(pat)`
Simplify calls in fnmatch. 2011-10-20 18:22:10 +02:00			`match = _compile_pattern(pat)`
Patch #409973: Speedup glob.glob, add fnmatch.filter. 2001-06-06 08:24:38 +02:00			`if os.path is posixpath:`
			`# normcase on posix is NOP. Optimize it away from the loop.`
			`for name in names:`
			`if match(name):`
			`result.append(name)`
			`else:`
			`for name in names:`
			`if match(os.path.normcase(name)):`
			`result.append(name)`
			`return result`

changes for the Mac 1995-01-27 03:41:45 +01:00			`def fnmatchcase(name, pat):`
Whitespace normalization. 2001-01-15 00:36:06 +01:00			`"""Test whether FILENAME matches PATTERN, including case.`

			`This is a version of fnmatch() which doesn't case-normalize`
			`its arguments.`
			`"""`
Simplify calls in fnmatch. 2011-10-20 18:22:10 +02:00			`match = _compile_pattern(pat)`
Issue #3187: Better support for "undecodable" filenames. Code by Victor Stinner, with small tweaks by GvR. 2008-10-02 20:55:37 +02:00			`return match(name) is not None`
Initial revision 1991-01-01 19:11:14 +01:00
Make fnmatch be more PEP 8 compliant. Partially closes issue 9356. Thanks to Brian Brazil for the patch. 2010-07-23 18:22:25 +02:00
Rewritten using regex. 1992-01-13 00:29:29 +01:00			`def translate(pat):`
Whitespace normalization. 2001-01-15 00:36:06 +01:00			`"""Translate a shell PATTERN to a regular expression.`

			`There is no way to quote meta-characters.`
			`"""`

bpo-40480 "fnmatch" exponential execution time (GH-19908) bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time. 2020-05-06 04:28:24 +02:00			`STAR = object()`
GH-72904: Add `glob.translate()` function (#106703) Add `glob.translate()` function that converts a pathname with shell wildcards to a regular expression. The regular expression is used by pathlib to implement `match()` and `glob()`. This function differs from `fnmatch.translate()` in that wildcards do not match path separators by default, and that a `` pattern segment matches precisely one path segment. When recursive* is set to true, `` pattern segments match any number of path segments, and `` cannot appear outside its own segment. In pathlib, this change speeds up directory walking (because `_make_child_relpath()` does less work), makes path objects smaller (they don't need a `_lines` slot), and removes the need for some gnarly code. Co-authored-by: Jason R. Coombs <jaraco@jaraco.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-11-13 18:15:56 +01:00			`parts = _translate(pat, STAR, '.')`
			`return _join_translated_parts(parts, STAR)`


			`def _translate(pat, STAR, QUESTION_MARK):`
bpo-40480 "fnmatch" exponential execution time (GH-19908) bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time. 2020-05-06 04:28:24 +02:00			`res = []`
			`add = res.append`
Whitespace normalization. 2001-01-15 00:36:06 +01:00			`i, n = 0, len(pat)`
			`while i < n:`
			`c = pat[i]`
			`i = i+1`
			`if c == '*':`
bpo-40480 "fnmatch" exponential execution time (GH-19908) bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time. 2020-05-06 04:28:24 +02:00			# compress consecutive `*` into one
			`if (not res) or res[-1] is not STAR:`
			`add(STAR)`
Whitespace normalization. 2001-01-15 00:36:06 +01:00			`elif c == '?':`
GH-72904: Add `glob.translate()` function (#106703) Add `glob.translate()` function that converts a pathname with shell wildcards to a regular expression. The regular expression is used by pathlib to implement `match()` and `glob()`. This function differs from `fnmatch.translate()` in that wildcards do not match path separators by default, and that a `` pattern segment matches precisely one path segment. When recursive* is set to true, `` pattern segments match any number of path segments, and `` cannot appear outside its own segment. In pathlib, this change speeds up directory walking (because `_make_child_relpath()` does less work), makes path objects smaller (they don't need a `_lines` slot), and removes the need for some gnarly code. Co-authored-by: Jason R. Coombs <jaraco@jaraco.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-11-13 18:15:56 +01:00			`add(QUESTION_MARK)`
Whitespace normalization. 2001-01-15 00:36:06 +01:00			`elif c == '[':`
			`j = i`
			`if j < n and pat[j] == '!':`
			`j = j+1`
			`if j < n and pat[j] == ']':`
			`j = j+1`
			`while j < n and pat[j] != ']':`
			`j = j+1`
			`if j >= n:`
bpo-40480 "fnmatch" exponential execution time (GH-19908) bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time. 2020-05-06 04:28:24 +02:00			`add('\\[')`
Whitespace normalization. 2001-01-15 00:36:06 +01:00			`else:`
bpo-32775: Fix regular expression warnings in fnmatch. (#5583) fnmatch.translate() no longer produces patterns which contain set operations. Sets starting with '[' or containing '--', '&&', '~~' or '\|\|' will be interpreted differently in regular expressions in future versions. Currently they emit warnings. fnmatch.translate() now avoids producing patterns containing such sets by accident. 2018-02-09 12:30:19 +01:00			`stuff = pat[i:j]`
gh-89973: Fix re.error in the fnmatch module. (GH-93072) Character ranges with upper bound less that lower bound (e.g. [c-a]) are now interpreted as empty ranges, for compatibility with other glob pattern implementations. Previously it was re.error. 2022-06-05 10:46:29 +02:00			`if '-' not in stuff:`
bpo-32775: Fix regular expression warnings in fnmatch. (#5583) fnmatch.translate() no longer produces patterns which contain set operations. Sets starting with '[' or containing '--', '&&', '~~' or '\|\|' will be interpreted differently in regular expressions in future versions. Currently they emit warnings. fnmatch.translate() now avoids producing patterns containing such sets by accident. 2018-02-09 12:30:19 +01:00			`stuff = stuff.replace('\\', r'\\')`
			`else:`
			`chunks = []`
			`k = i+2 if pat[i] == '!' else i+1`
			`while True:`
			`k = pat.find('-', k, j)`
			`if k < 0:`
			`break`
			`chunks.append(pat[i:k])`
			`i = k+1`
			`k = k+3`
gh-89973: Fix re.error in the fnmatch module. (GH-93072) Character ranges with upper bound less that lower bound (e.g. [c-a]) are now interpreted as empty ranges, for compatibility with other glob pattern implementations. Previously it was re.error. 2022-06-05 10:46:29 +02:00			`chunk = pat[i:j]`
			`if chunk:`
			`chunks.append(chunk)`
			`else:`
			`chunks[-1] += '-'`
			`# Remove empty ranges -- invalid in RE.`
			`for k in range(len(chunks)-1, 0, -1):`
			`if chunks[k-1][-1] > chunks[k][0]:`
			`chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]`
			`del chunks[k]`
bpo-32775: Fix regular expression warnings in fnmatch. (#5583) fnmatch.translate() no longer produces patterns which contain set operations. Sets starting with '[' or containing '--', '&&', '~~' or '\|\|' will be interpreted differently in regular expressions in future versions. Currently they emit warnings. fnmatch.translate() now avoids producing patterns containing such sets by accident. 2018-02-09 12:30:19 +01:00			`# Escape backslashes and hyphens for set difference (--).`
			`# Hyphens that create ranges shouldn't be escaped.`
			`stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')`
			`for s in chunks)`
			`# Escape set operations (&&, ~~ and \|\|).`
			`stuff = re.sub(r'([&~\|])', r'\\\1', stuff)`
Whitespace normalization. 2001-01-15 00:36:06 +01:00			`i = j+1`
gh-89973: Fix re.error in the fnmatch module. (GH-93072) Character ranges with upper bound less that lower bound (e.g. [c-a]) are now interpreted as empty ranges, for compatibility with other glob pattern implementations. Previously it was re.error. 2022-06-05 10:46:29 +02:00			`if not stuff:`
			`# Empty range: never match.`
			`add('(?!)')`
			`elif stuff == '!':`
			`# Negated empty range: match any character.`
			`add('.')`
			`else:`
			`if stuff[0] == '!':`
			`stuff = '^' + stuff[1:]`
			`elif stuff[0] in ('^', '['):`
			`stuff = '\\' + stuff`
			`add(f'[{stuff}]')`
Whitespace normalization. 2001-01-15 00:36:06 +01:00			`else:`
bpo-40480 "fnmatch" exponential execution time (GH-19908) bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time. 2020-05-06 04:28:24 +02:00			`add(re.escape(c))`
			`assert i == n`
GH-72904: Add `glob.translate()` function (#106703) Add `glob.translate()` function that converts a pathname with shell wildcards to a regular expression. The regular expression is used by pathlib to implement `match()` and `glob()`. This function differs from `fnmatch.translate()` in that wildcards do not match path separators by default, and that a `` pattern segment matches precisely one path segment. When recursive* is set to true, `` pattern segments match any number of path segments, and `` cannot appear outside its own segment. In pathlib, this change speeds up directory walking (because `_make_child_relpath()` does less work), makes path objects smaller (they don't need a `_lines` slot), and removes the need for some gnarly code. Co-authored-by: Jason R. Coombs <jaraco@jaraco.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-11-13 18:15:56 +01:00			`return res`

bpo-40480 "fnmatch" exponential execution time (GH-19908) bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time. 2020-05-06 04:28:24 +02:00
GH-72904: Add `glob.translate()` function (#106703) Add `glob.translate()` function that converts a pathname with shell wildcards to a regular expression. The regular expression is used by pathlib to implement `match()` and `glob()`. This function differs from `fnmatch.translate()` in that wildcards do not match path separators by default, and that a `` pattern segment matches precisely one path segment. When recursive* is set to true, `` pattern segments match any number of path segments, and `` cannot appear outside its own segment. In pathlib, this change speeds up directory walking (because `_make_child_relpath()` does less work), makes path objects smaller (they don't need a `_lines` slot), and removes the need for some gnarly code. Co-authored-by: Jason R. Coombs <jaraco@jaraco.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> 2023-11-13 18:15:56 +01:00			`def _join_translated_parts(inp, STAR):`
bpo-40480 "fnmatch" exponential execution time (GH-19908) bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time. 2020-05-06 04:28:24 +02:00			`# Deal with STARs.`
			`res = []`
			`add = res.append`
			`i, n = 0, len(inp)`
			`# Fixed pieces at the start?`
			`while i < n and inp[i] is not STAR:`
			`add(inp[i])`
			`i += 1`
			`# Now deal with STAR fixed STAR fixed ...`
			# For an interior `STAR fixed` pairing, we want to do a minimal
			# .*? match followed by `fixed`, with no possibility of backtracking.
bpo-47080: Use atomic groups to simplify fnmatch (GH-32029) Use re's new atomic groups to greatly simplify the construction of worst-case linear-time patterns. 2022-03-21 18:49:43 +01:00			`# Atomic groups ("(?>...)") allow us to spell that directly.`
			`# Note: people rely on the undocumented ability to join multiple`
			`# translate() results together via "\|" to build large regexps matching`
			`# "one of many" shell patterns.`
bpo-40480 "fnmatch" exponential execution time (GH-19908) bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time. 2020-05-06 04:28:24 +02:00			`while i < n:`
			`assert inp[i] is STAR`
			`i += 1`
			`if i == n:`
			`add(".*")`
			`break`
			`assert inp[i] is not STAR`
			`fixed = []`
			`while i < n and inp[i] is not STAR:`
			`fixed.append(inp[i])`
			`i += 1`
			`fixed = "".join(fixed)`
			`if i == n:`
			`add(".*")`
			`add(fixed)`
			`else:`
bpo-47080: Use atomic groups to simplify fnmatch (GH-32029) Use re's new atomic groups to greatly simplify the construction of worst-case linear-time patterns. 2022-03-21 18:49:43 +01:00			`add(f"(?>.*?{fixed})")`
bpo-40480 "fnmatch" exponential execution time (GH-19908) bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time. 2020-05-06 04:28:24 +02:00			`assert i == n`
			`res = "".join(res)`
			`return fr'(?s:{res})\Z'`