From 23cdbfa744f0ec0e9e7575d378df4cb758691cd3 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 9 Feb 2018 13:30:19 +0200 Subject: [PATCH] bpo-32775: Fix regular expression warnings in fnmatch. (#5583) fnmatch.translate() no longer produces patterns which contain set operations. Sets starting with '[' or containing '--', '&&', '~~' or '||' will be interpreted differently in regular expressions in future versions. Currently they emit warnings. fnmatch.translate() now avoids producing patterns containing such sets by accident. --- Lib/fnmatch.py | 23 +++++++++++++++++-- Lib/test/test_fnmatch.py | 12 ++++++++++ .../2018-02-07-19-12-10.bpo-32775.-T77_c.rst | 5 ++++ 3 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2018-02-07-19-12-10.bpo-32775.-T77_c.rst diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index fd3b5142e34..b98e6413295 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -97,11 +97,30 @@ def translate(pat): if j >= n: res = res + '\\[' else: - stuff = pat[i:j].replace('\\','\\\\') + stuff = pat[i:j] + if '--' not in stuff: + stuff = stuff.replace('\\', r'\\') + else: + chunks = [] + k = i+2 if pat[i] == '!' else i+1 + while True: + k = pat.find('-', k, j) + if k < 0: + break + chunks.append(pat[i:k]) + i = k+1 + k = k+3 + chunks.append(pat[i:j]) + # Escape backslashes and hyphens for set difference (--). + # Hyphens that create ranges shouldn't be escaped. + stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') + for s in chunks) + # Escape set operations (&&, ~~ and ||). + stuff = re.sub(r'([&~|])', r'\\\1', stuff) i = j+1 if stuff[0] == '!': stuff = '^' + stuff[1:] - elif stuff[0] == '^': + elif stuff[0] in ('^', '['): stuff = '\\' + stuff res = '%s[%s]' % (res, stuff) else: diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 78245c3ab1a..55f9f0d3a54 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -2,6 +2,7 @@ import unittest import os +import warnings from fnmatch import fnmatch, fnmatchcase, translate, filter @@ -83,6 +84,17 @@ class FnmatchTestCase(unittest.TestCase): check('usr/bin', 'usr\\bin', normsep) check('usr\\bin', 'usr\\bin') + def test_warnings(self): + with warnings.catch_warnings(): + warnings.simplefilter('error', Warning) + check = self.check_match + check('[', '[[]') + check('&', '[a&&b]') + check('|', '[a||b]') + check('~', '[a~~b]') + check(',', '[a-z+--A-Z]') + check('.', '[a-z--/A-Z]') + class TranslateTestCase(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2018-02-07-19-12-10.bpo-32775.-T77_c.rst b/Misc/NEWS.d/next/Library/2018-02-07-19-12-10.bpo-32775.-T77_c.rst new file mode 100644 index 00000000000..ed563c17fdc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-02-07-19-12-10.bpo-32775.-T77_c.rst @@ -0,0 +1,5 @@ +:func:`fnmatch.translate()` no longer produces patterns which contain set +operations. Sets starting with '[' or containing '--', '&&', '~~' or '||' +will be interpreted differently in regular expressions in future versions. +Currently they emit warnings. fnmatch.translate() now avoids producing +patterns containing such sets by accident.