bpo-40480: restore ability to join fnmatch.translate() results (GH-20049)
In translate(), generate unique group names across calls. The restores the undocumented ability to get a valid regexp by joining multiple translate() results via `|`.
This commit is contained in:
parent
d0919f0d6b
commit
b1b4c790e7
|
@ -16,6 +16,12 @@ import functools
|
||||||
|
|
||||||
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
|
__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"]
|
||||||
|
|
||||||
|
# Build a thread-safe incrementing counter to help create unique regexp group
|
||||||
|
# names across calls.
|
||||||
|
from itertools import count
|
||||||
|
_nextgroupnum = count().__next__
|
||||||
|
del count
|
||||||
|
|
||||||
def fnmatch(name, pat):
|
def fnmatch(name, pat):
|
||||||
"""Test whether FILENAME matches PATTERN.
|
"""Test whether FILENAME matches PATTERN.
|
||||||
|
|
||||||
|
@ -148,9 +154,12 @@ def translate(pat):
|
||||||
# in a lookahead assertion, save the matched part in a group, then
|
# in a lookahead assertion, save the matched part in a group, then
|
||||||
# consume that group via a backreference. If the overall match fails,
|
# consume that group via a backreference. If the overall match fails,
|
||||||
# the lookahead assertion won't try alternatives. So the translation is:
|
# the lookahead assertion won't try alternatives. So the translation is:
|
||||||
# (?=(P<name>.*?fixed))(?P=name)
|
# (?=(?P<name>.*?fixed))(?P=name)
|
||||||
# Group names are created as needed: g1, g2, g3, ...
|
# Group names are created as needed: g0, g1, g2, ...
|
||||||
groupnum = 0
|
# The numbers are obtained from _nextgroupnum() to ensure they're unique
|
||||||
|
# across calls and across threads. This is because people rely on the
|
||||||
|
# undocumented ability to join multiple translate() results together via
|
||||||
|
# "|" to build large regexps matching "one of many" shell patterns.
|
||||||
while i < n:
|
while i < n:
|
||||||
assert inp[i] is STAR
|
assert inp[i] is STAR
|
||||||
i += 1
|
i += 1
|
||||||
|
@ -167,7 +176,7 @@ def translate(pat):
|
||||||
add(".*")
|
add(".*")
|
||||||
add(fixed)
|
add(fixed)
|
||||||
else:
|
else:
|
||||||
groupnum += 1
|
groupnum = _nextgroupnum()
|
||||||
add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
|
add(f"(?=(?P<g{groupnum}>.*?{fixed}))(?P=g{groupnum})")
|
||||||
assert i == n
|
assert i == n
|
||||||
res = "".join(res)
|
res = "".join(res)
|
||||||
|
|
|
@ -106,6 +106,7 @@ class FnmatchTestCase(unittest.TestCase):
|
||||||
class TranslateTestCase(unittest.TestCase):
|
class TranslateTestCase(unittest.TestCase):
|
||||||
|
|
||||||
def test_translate(self):
|
def test_translate(self):
|
||||||
|
import re
|
||||||
self.assertEqual(translate('*'), r'(?s:.*)\Z')
|
self.assertEqual(translate('*'), r'(?s:.*)\Z')
|
||||||
self.assertEqual(translate('?'), r'(?s:.)\Z')
|
self.assertEqual(translate('?'), r'(?s:.)\Z')
|
||||||
self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
|
self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z')
|
||||||
|
@ -122,9 +123,26 @@ class TranslateTestCase(unittest.TestCase):
|
||||||
self.assertEqual(translate('*********A'), r'(?s:.*A)\Z')
|
self.assertEqual(translate('*********A'), r'(?s:.*A)\Z')
|
||||||
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
|
self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z')
|
||||||
# fancy translation to prevent exponential-time match failure
|
# fancy translation to prevent exponential-time match failure
|
||||||
self.assertEqual(translate('**a*a****a'),
|
t = translate('**a*a****a')
|
||||||
r'(?s:(?=(?P<g1>.*?a))(?P=g1)(?=(?P<g2>.*?a))(?P=g2).*a)\Z')
|
digits = re.findall(r'\d+', t)
|
||||||
|
self.assertEqual(len(digits), 4)
|
||||||
|
self.assertEqual(digits[0], digits[1])
|
||||||
|
self.assertEqual(digits[2], digits[3])
|
||||||
|
g1 = f"g{digits[0]}" # e.g., group name "g4"
|
||||||
|
g2 = f"g{digits[2]}" # e.g., group name "g5"
|
||||||
|
self.assertEqual(t,
|
||||||
|
fr'(?s:(?=(?P<{g1}>.*?a))(?P={g1})(?=(?P<{g2}>.*?a))(?P={g2}).*a)\Z')
|
||||||
|
# and try pasting multiple translate results - it's an undocumented
|
||||||
|
# feature that this works; all the pain of generating unique group
|
||||||
|
# names across calls exists to support this
|
||||||
|
r1 = translate('**a**a**a*')
|
||||||
|
r2 = translate('**b**b**b*')
|
||||||
|
r3 = translate('*c*c*c*')
|
||||||
|
fatre = "|".join([r1, r2, r3])
|
||||||
|
self.assertTrue(re.match(fatre, 'abaccad'))
|
||||||
|
self.assertTrue(re.match(fatre, 'abxbcab'))
|
||||||
|
self.assertTrue(re.match(fatre, 'cbabcaxc'))
|
||||||
|
self.assertFalse(re.match(fatre, 'dabccbad'))
|
||||||
|
|
||||||
class FilterTestCase(unittest.TestCase):
|
class FilterTestCase(unittest.TestCase):
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue