diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index b98e6413295..d7d915d5131 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -77,15 +77,19 @@ def translate(pat): There is no way to quote meta-characters. """ + STAR = object() + res = [] + add = res.append i, n = 0, len(pat) - res = '' while i < n: c = pat[i] i = i+1 if c == '*': - res = res + '.*' + # compress consecutive `*` into one + if (not res) or res[-1] is not STAR: + add(STAR) elif c == '?': - res = res + '.' + add('.') elif c == '[': j = i if j < n and pat[j] == '!': @@ -95,7 +99,7 @@ def translate(pat): while j < n and pat[j] != ']': j = j+1 if j >= n: - res = res + '\\[' + add('\\[') else: stuff = pat[i:j] if '--' not in stuff: @@ -122,7 +126,49 @@ def translate(pat): stuff = '^' + stuff[1:] elif stuff[0] in ('^', '['): stuff = '\\' + stuff - res = '%s[%s]' % (res, stuff) + add(f'[{stuff}]') else: - res = res + re.escape(c) - return r'(?s:%s)\Z' % res + add(re.escape(c)) + assert i == n + + # Deal with STARs. + inp = res + res = [] + add = res.append + i, n = 0, len(inp) + # Fixed pieces at the start? + while i < n and inp[i] is not STAR: + add(inp[i]) + i += 1 + # Now deal with STAR fixed STAR fixed ... + # For an interior `STAR fixed` pairing, we want to do a minimal + # .*? match followed by `fixed`, with no possibility of backtracking. + # We can't spell that directly, but can trick it into working by matching + # .*?fixed + # in a lookahead assertion, save the matched part in a group, then + # consume that group via a backreference. If the overall match fails, + # the lookahead assertion won't try alternatives. So the translation is: + # (?=(P.*?fixed))(?P=name) + # Group names are created as needed: g1, g2, g3, ... + groupnum = 0 + while i < n: + assert inp[i] is STAR + i += 1 + if i == n: + add(".*") + break + assert inp[i] is not STAR + fixed = [] + while i < n and inp[i] is not STAR: + fixed.append(inp[i]) + i += 1 + fixed = "".join(fixed) + if i == n: + add(".*") + add(fixed) + else: + groupnum += 1 + add(f"(?=(?P.*?{fixed}))(?P=g{groupnum})") + assert i == n + res = "".join(res) + return fr'(?s:{res})\Z' diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 55f9f0d3a54..4c173069503 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -45,6 +45,13 @@ class FnmatchTestCase(unittest.TestCase): check('\nfoo', 'foo*', False) check('\n', '*') + def test_slow_fnmatch(self): + check = self.check_match + check('a' * 50, '*a*a*a*a*a*a*a*a*a*a') + # The next "takes forever" if the regexp translation is + # straightforward. See bpo-40480. + check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False) + def test_mix_bytes_str(self): self.assertRaises(TypeError, fnmatch, 'test', b'*') self.assertRaises(TypeError, fnmatch, b'test', '*') @@ -107,6 +114,16 @@ class TranslateTestCase(unittest.TestCase): self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z') self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z') self.assertEqual(translate('[x'), r'(?s:\[x)\Z') + # from the docs + self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z') + # squash consecutive stars + self.assertEqual(translate('*********'), r'(?s:.*)\Z') + self.assertEqual(translate('A*********'), r'(?s:A.*)\Z') + self.assertEqual(translate('*********A'), r'(?s:.*A)\Z') + self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z') + # fancy translation to prevent exponential-time match failure + self.assertEqual(translate('**a*a****a'), + r'(?s:(?=(?P.*?a))(?P=g1)(?=(?P.*?a))(?P=g2).*a)\Z') class FilterTestCase(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst b/Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst new file mode 100644 index 00000000000..d046b142241 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-05-04-21-21-43.bpo-40480.mjldWa.rst @@ -0,0 +1 @@ +``fnmatch.fnmatch()`` could take exponential time in the presence of multiple ``*`` pattern characters. This was repaired by generating more elaborate regular expressions to avoid futile backtracking. \ No newline at end of file