bpo-40480 "fnmatch" exponential execution time (GH-19908)
bpo-40480: create different regexps in the presence of multiple `*` patterns to prevent fnmatch() from taking exponential time.
This commit is contained in:
parent
96074de573
commit
b9c46a2c2d
|
@ -77,15 +77,19 @@ def translate(pat):
|
|||
There is no way to quote meta-characters.
|
||||
"""
|
||||
|
||||
STAR = object()
|
||||
res = []
|
||||
add = res.append
|
||||
i, n = 0, len(pat)
|
||||
res = ''
|
||||
while i < n:
|
||||
c = pat[i]
|
||||
i = i+1
|
||||
if c == '*':
|
||||
res = res + '.*'
|
||||
# compress consecutive `*` into one
|
||||
if (not res) or res[-1] is not STAR:
|
||||
add(STAR)
|
||||
elif c == '?':
|
||||
res = res + '.'
|
||||
add('.')
|
||||
elif c == '[':
|
||||
j = i
|
||||
if j < n and pat[j] == '!':
|
||||
|
@ -95,7 +99,7 @@ def translate(pat):
|
|||
while j < n and pat[j] != ']':
|
||||
j = j+1
|
||||
if j >= n:
|
||||
res = res + '\\['
|
||||
add('\\[')
|
||||
else:
|
||||
stuff = pat[i:j]
|
||||
if '--' not in stuff:
|
||||
|
@ -122,7 +126,49 @@ def translate(pat):
|
|||
stuff = '^' + stuff[1:]
|
||||
elif stuff[0] in ('^', '['):
|
||||
stuff = '\\' + stuff
|
||||
res = '%s[%s]' % (res, stuff)
|
||||
add(f'[{stuff}]')
|
||||
else:
|
||||
res = res + re.escape(c)
|
||||
return r'(?s:%s)\Z' % res
|
||||
add(re.escape(c))
|
||||
assert i == n
|
||||
|
||||
# Deal with STARs.
|
||||
inp = res
|
||||
res = []
|
||||
add = res.append
|
||||
i, n = 0, len(inp)
|
||||
# Fixed pieces at the start?
|
||||
while i < n and inp[i] is not STAR:
|
||||
add(inp[i])
|
||||
i += 1
|
||||
# Now deal with STAR fixed STAR fixed ...
|
||||
# For an interior `STAR fixed` pairing, we want to do a minimal
|
||||
# .*? match followed by `fixed`, with no possibility of backtracking.
|
||||
# We can't spell that directly, but can trick it into working by matching
|
||||
# .*?fixed
|
||||
# in a lookahead assertion, save the matched part in a group, then
|
||||
# consume that group via a backreference. If the overall match fails,
|
||||
# the lookahead assertion won't try alternatives. So the translation is:
|
||||
# (?=(P<name>.*?fixed))(?P=name)
|
||||
# Group names are created as needed: g1, g2, g3, ...
|
||||
groupnum = 0
|
||||
while i < n:
|
||||
assert inp[i] is STAR
|
||||
i += 1
|
||||
if i == n:
|
||||
add(".*")
|
||||
break
|
||||
assert inp[i] is not STAR
|
||||
fixed = []
|
||||
while i < n and inp[i] is not STAR:
|
||||
fixed.append(inp[i])
|
||||
i += 1
|
||||
fixed = "".join(fixed)
|
||||
if i == n:
|
||||
add(".*")
|
||||
add(fixed)
|
||||
else:
|
||||
groupnum += 1
|
||||
add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
|
||||
assert i == n
|
||||
res = "".join(res)
|
||||
return fr'(?s:{res})\Z'
|
||||
|
|
|
@ -45,6 +45,13 @@ class FnmatchTestCase(unittest.TestCase):
|
|||
check('\nfoo', 'foo*', False)
|
||||
check('\n', '*')
|
||||
|
||||
def test_slow_fnmatch(self):
|
||||
check = self.check_match
|
||||
check('a' * 50, '*a*a*a*a*a*a*a*a*a*a')
|
||||
# The next "takes forever" if the regexp translation is
|
||||
# straightforward. See bpo-40480.
|
||||
check('a' * 50 + 'b', '*a*a*a*a*a*a*a*a*a*a', False)
|
||||
|
||||
def test_mix_bytes_str(self):
|
||||
self.assertRaises(TypeError, fnmatch, 'test', b'*')
|
||||
self.assertRaises(TypeError, fnmatch, b'test', '*')
|
||||
|
@ -107,6 +114,16 @@ class TranslateTestCase(unittest.TestCase):
|
|||
self.assertEqual(translate('[!x]'), r'(?s:[^x])\Z')
|
||||
self.assertEqual(translate('[^x]'), r'(?s:[\^x])\Z')
|
||||
self.assertEqual(translate('[x'), r'(?s:\[x)\Z')
|
||||
# from the docs
|
||||
self.assertEqual(translate('*.txt'), r'(?s:.*\.txt)\Z')
|
||||
# squash consecutive stars
|
||||
self.assertEqual(translate('*********'), r'(?s:.*)\Z')
|
||||
self.assertEqual(translate('A*********'), r'(?s:A.*)\Z')
|
||||
self.assertEqual(translate('*********A'), r'(?s:.*A)\Z')
|
||||
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
|
||||
# fancy translation to prevent exponential-time match failure
|
||||
self.assertEqual(translate('**a*a****a'),
|
||||
r'(?s:(?=(?P<g1>.*?a))(?P=g1)(?=(?P<g2>.*?a))(?P=g2).*a)\Z')
|
||||
|
||||
|
||||
class FilterTestCase(unittest.TestCase):
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
``fnmatch.fnmatch()`` could take exponential time in the presence of multiple ``*`` pattern characters. This was repaired by generating more elaborate regular expressions to avoid futile backtracking.
|
Loading…
Reference in New Issue