bpo-40480 "fnmatch" exponential execution time (GH-19908)

bpo-40480:  create different regexps in the presence of multiple `*`
patterns to prevent fnmatch() from taking exponential time.
This commit is contained in:
Tim Peters 2020-05-05 21:28:24 -05:00 committed by GitHub
parent 96074de573
commit b9c46a2c2d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 71 additions and 7 deletions

View File

@ -77,15 +77,19 @@ def translate(pat):
There is no way to quote meta-characters.
"""
STAR = object()
res = []
add = res.append
i, n = 0, len(pat)
res = ''
while i < n:
c = pat[i]
i = i+1
if c == '*':
res = res + '.*'
# compress consecutive `*` into one
if (not res) or res[-1] is not STAR:
add(STAR)
elif c == '?':
res = res + '.'
add('.')
elif c == '[':
j = i
if j < n and pat[j] == '!':
@ -95,7 +99,7 @@ def translate(pat):
while j < n and pat[j] != ']':
j = j+1
if j >= n:
res = res + '\\['
add('\\[')
else:
stuff = pat[i:j]
if '--' not in stuff:
@ -122,7 +126,49 @@ def translate(pat):
stuff = '^' + stuff[1:]
elif stuff[0] in ('^', '['):
stuff = '\\' + stuff
res = '%s[%s]' % (res, stuff)
add(f'[{stuff}]')
else:
res = res + re.escape(c)
return r'(?s:%s)\Z' % res
add(re.escape(c))
assert i == n
# Deal with STARs.
inp = res
res = []
add = res.append
i, n = 0, len(inp)
# Fixed pieces at the start?
while i < n and inp[i] is not STAR:
add(inp[i])
i += 1
# Now deal with STAR fixed STAR fixed ...
# For an interior `STAR fixed` pairing, we want to do a minimal
# .*? match followed by `fixed`, with no possibility of backtracking.
# We can't spell that directly, but can trick it into working by matching
# .*?fixed
# in a lookahead assertion, save the matched part in a group, then
# consume that group via a backreference. If the overall match fails,
# the lookahead assertion won't try alternatives. So the translation is:
# (?=(P<name>.*?fixed))(?P=name)
# Group names are created as needed: g1, g2, g3, ...
groupnum = 0
while i < n:
assert inp[i] is STAR
i += 1
if i == n:
add(".*")
break
assert inp[i] is not STAR
fixed = []
while i < n and inp[i] is not STAR:
fixed.append(inp[i])
i += 1
fixed = "".join(fixed)
if i == n:
add(".*")
add(fixed)
else:
groupnum += 1
add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
assert i == n
res = "".join(res)
return fr'(?s:{res})\Z'

View File

@ -45,6 +45,13 @@ class FnmatchTestCase(unittest.TestCase):
check('\nfoo', 'foo*', False)
check('\n', '*')
def test_slow_fnmatch(self):
check = self.check_match
check('a' * 50, '*a*a*a*a*a*a*a*a*a*a')
# The next "takes forever" if the regexp translation is
# straightforward. See bpo-40480.
check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False)
def test_mix_bytes_str(self):
self.assertRaises(TypeError, fnmatch, 'test', b'*')
self.assertRaises(TypeError, fnmatch, b'test', '*')
@ -107,6 +114,16 @@ class TranslateTestCase(unittest.TestCase):
self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
# from the docs
self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z')
# squash consecutive stars
self.assertEqual(translate('*********'), r'(?s:.*)\Z')
self.assertEqual(translate('A*********'), r'(?s:A.*)\Z')
self.assertEqual(translate('*********A'), r'(?s:.*A)\Z')
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
# fancy translation to prevent exponential-time match failure
self.assertEqual(translate('**a*a****a'),
r'(?s:(?=(?P<g1>.*?a))(?P=g1)(?=(?P<g2>.*?a))(?P=g2).*a)\Z')
class FilterTestCase(unittest.TestCase):

View File

@ -0,0 +1 @@
``fnmatch.fnmatch()`` could take exponential time in the presence of multiple ``*`` pattern characters. This was repaired by generating more elaborate regular expressions to avoid futile backtracking.