From b1b4c790e7d3b5f4244450aefe3d8f01710c13f7 Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Mon, 11 May 2020 21:19:20 -0500 Subject: [PATCH] bpo-40480: restore ability to join fnmatch.translate() results (GH-20049) In translate(), generate unique group names across calls. The restores the undocumented ability to get a valid regexp by joining multiple translate() results via `|`. --- Lib/fnmatch.py | 17 +++++++++++++---- Lib/test/test_fnmatch.py | 24 +++++++++++++++++++++--- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index d7d915d5131..0eb1802bdb5 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -16,6 +16,12 @@ import functools __all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] +# Build a thread-safe incrementing counter to help create unique regexp group +# names across calls. +from itertools import count +_nextgroupnum = count().__next__ +del count + def fnmatch(name, pat): """Test whether FILENAME matches PATTERN. @@ -148,9 +154,12 @@ def translate(pat): # in a lookahead assertion, save the matched part in a group, then # consume that group via a backreference. If the overall match fails, # the lookahead assertion won't try alternatives. So the translation is: - # (?=(P.*?fixed))(?P=name) - # Group names are created as needed: g1, g2, g3, ... - groupnum = 0 + # (?=(?P.*?fixed))(?P=name) + # Group names are created as needed: g0, g1, g2, ... + # The numbers are obtained from _nextgroupnum() to ensure they're unique + # across calls and across threads. This is because people rely on the + # undocumented ability to join multiple translate() results together via + # "|" to build large regexps matching "one of many" shell patterns. while i < n: assert inp[i] is STAR i += 1 @@ -167,7 +176,7 @@ def translate(pat): add(".*") add(fixed) else: - groupnum += 1 + groupnum = _nextgroupnum() add(f"(?=(?P.*?{fixed}))(?P=g{groupnum})") assert i == n res = "".join(res) diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 4c173069503..10668e4f610 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -106,6 +106,7 @@ class FnmatchTestCase(unittest.TestCase): class TranslateTestCase(unittest.TestCase): def test_translate(self): + import re self.assertEqual(translate('*'), r'(?s:.*)\Z') self.assertEqual(translate('?'), r'(?s:.)\Z') self.assertEqual(translate('a?b*'), r'(?s:a.b.*)\Z') @@ -122,9 +123,26 @@ class TranslateTestCase(unittest.TestCase): self.assertEqual(translate('*********A'), r'(?s:.*A)\Z') self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z') # fancy translation to prevent exponential-time match failure - self.assertEqual(translate('**a*a****a'), - r'(?s:(?=(?P.*?a))(?P=g1)(?=(?P.*?a))(?P=g2).*a)\Z') - + t = translate('**a*a****a') + digits = re.findall(r'\d+', t) + self.assertEqual(len(digits), 4) + self.assertEqual(digits[0], digits[1]) + self.assertEqual(digits[2], digits[3]) + g1 = f"g{digits[0]}" # e.g., group name "g4" + g2 = f"g{digits[2]}" # e.g., group name "g5" + self.assertEqual(t, + fr'(?s:(?=(?P<{g1}>.*?a))(?P={g1})(?=(?P<{g2}>.*?a))(?P={g2}).*a)\Z') + # and try pasting multiple translate results - it's an undocumented + # feature that this works; all the pain of generating unique group + # names across calls exists to support this + r1 = translate('**a**a**a*') + r2 = translate('**b**b**b*') + r3 = translate('*c*c*c*') + fatre = "|".join([r1, r2, r3]) + self.assertTrue(re.match(fatre, 'abaccad')) + self.assertTrue(re.match(fatre, 'abxbcab')) + self.assertTrue(re.match(fatre, 'cbabcaxc')) + self.assertFalse(re.match(fatre, 'dabccbad')) class FilterTestCase(unittest.TestCase):