mirror of https://github.com/python/cpython
gh-89973: Fix re.error in the fnmatch module. (GH-93072)
Character ranges with upper bound less that lower bound (e.g. [c-a]) are now interpreted as empty ranges, for compatibility with other glob pattern implementations. Previously it was re.error.
This commit is contained in:
parent
6f8367d348
commit
0902c3d8ed
|
@ -102,7 +102,7 @@ def translate(pat):
|
|||
add('\\[')
|
||||
else:
|
||||
stuff = pat[i:j]
|
||||
if '--' not in stuff:
|
||||
if '-' not in stuff:
|
||||
stuff = stuff.replace('\\', r'\\')
|
||||
else:
|
||||
chunks = []
|
||||
|
@ -114,7 +114,16 @@ def translate(pat):
|
|||
chunks.append(pat[i:k])
|
||||
i = k+1
|
||||
k = k+3
|
||||
chunks.append(pat[i:j])
|
||||
chunk = pat[i:j]
|
||||
if chunk:
|
||||
chunks.append(chunk)
|
||||
else:
|
||||
chunks[-1] += '-'
|
||||
# Remove empty ranges -- invalid in RE.
|
||||
for k in range(len(chunks)-1, 0, -1):
|
||||
if chunks[k-1][-1] > chunks[k][0]:
|
||||
chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:]
|
||||
del chunks[k]
|
||||
# Escape backslashes and hyphens for set difference (--).
|
||||
# Hyphens that create ranges shouldn't be escaped.
|
||||
stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-')
|
||||
|
@ -122,6 +131,13 @@ def translate(pat):
|
|||
# Escape set operations (&&, ~~ and ||).
|
||||
stuff = re.sub(r'([&~|])', r'\\\1', stuff)
|
||||
i = j+1
|
||||
if not stuff:
|
||||
# Empty range: never match.
|
||||
add('(?!)')
|
||||
elif stuff == '!':
|
||||
# Negated empty range: match any character.
|
||||
add('.')
|
||||
else:
|
||||
if stuff[0] == '!':
|
||||
stuff = '^' + stuff[1:]
|
||||
elif stuff[0] in ('^', '['):
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
import unittest
|
||||
import os
|
||||
import string
|
||||
import warnings
|
||||
|
||||
from fnmatch import fnmatch, fnmatchcase, translate, filter
|
||||
|
@ -91,6 +92,119 @@ class FnmatchTestCase(unittest.TestCase):
|
|||
check('usr/bin', 'usr\\bin', normsep)
|
||||
check('usr\\bin', 'usr\\bin')
|
||||
|
||||
def test_char_set(self):
|
||||
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
|
||||
check = self.check_match
|
||||
tescases = string.ascii_lowercase + string.digits + string.punctuation
|
||||
for c in tescases:
|
||||
check(c, '[az]', c in 'az')
|
||||
check(c, '[!az]', c not in 'az')
|
||||
# Case insensitive.
|
||||
for c in tescases:
|
||||
check(c, '[AZ]', (c in 'az') and ignorecase)
|
||||
check(c, '[!AZ]', (c not in 'az') or not ignorecase)
|
||||
for c in string.ascii_uppercase:
|
||||
check(c, '[az]', (c in 'AZ') and ignorecase)
|
||||
check(c, '[!az]', (c not in 'AZ') or not ignorecase)
|
||||
# Repeated same character.
|
||||
for c in tescases:
|
||||
check(c, '[aa]', c == 'a')
|
||||
# Special cases.
|
||||
for c in tescases:
|
||||
check(c, '[^az]', c in '^az')
|
||||
check(c, '[[az]', c in '[az')
|
||||
check(c, r'[!]]', c != ']')
|
||||
check('[', '[')
|
||||
check('[]', '[]')
|
||||
check('[!', '[!')
|
||||
check('[!]', '[!]')
|
||||
|
||||
def test_range(self):
|
||||
ignorecase = os.path.normcase('ABC') == os.path.normcase('abc')
|
||||
normsep = os.path.normcase('\\') == os.path.normcase('/')
|
||||
check = self.check_match
|
||||
tescases = string.ascii_lowercase + string.digits + string.punctuation
|
||||
for c in tescases:
|
||||
check(c, '[b-d]', c in 'bcd')
|
||||
check(c, '[!b-d]', c not in 'bcd')
|
||||
check(c, '[b-dx-z]', c in 'bcdxyz')
|
||||
check(c, '[!b-dx-z]', c not in 'bcdxyz')
|
||||
# Case insensitive.
|
||||
for c in tescases:
|
||||
check(c, '[B-D]', (c in 'bcd') and ignorecase)
|
||||
check(c, '[!B-D]', (c not in 'bcd') or not ignorecase)
|
||||
for c in string.ascii_uppercase:
|
||||
check(c, '[b-d]', (c in 'BCD') and ignorecase)
|
||||
check(c, '[!b-d]', (c not in 'BCD') or not ignorecase)
|
||||
# Upper bound == lower bound.
|
||||
for c in tescases:
|
||||
check(c, '[b-b]', c == 'b')
|
||||
# Special cases.
|
||||
for c in tescases:
|
||||
check(c, '[!-#]', c not in '-#')
|
||||
check(c, '[!--.]', c not in '-.')
|
||||
check(c, '[^-`]', c in '^_`')
|
||||
if not (normsep and c == '/'):
|
||||
check(c, '[[-^]', c in r'[\]^')
|
||||
check(c, r'[\-^]', c in r'\]^')
|
||||
check(c, '[b-]', c in '-b')
|
||||
check(c, '[!b-]', c not in '-b')
|
||||
check(c, '[-b]', c in '-b')
|
||||
check(c, '[!-b]', c not in '-b')
|
||||
check(c, '[-]', c in '-')
|
||||
check(c, '[!-]', c not in '-')
|
||||
# Upper bound is less that lower bound: error in RE.
|
||||
for c in tescases:
|
||||
check(c, '[d-b]', False)
|
||||
check(c, '[!d-b]', True)
|
||||
check(c, '[d-bx-z]', c in 'xyz')
|
||||
check(c, '[!d-bx-z]', c not in 'xyz')
|
||||
check(c, '[d-b^-`]', c in '^_`')
|
||||
if not (normsep and c == '/'):
|
||||
check(c, '[d-b[-^]', c in r'[\]^')
|
||||
|
||||
def test_sep_in_char_set(self):
|
||||
normsep = os.path.normcase('\\') == os.path.normcase('/')
|
||||
check = self.check_match
|
||||
check('/', r'[/]')
|
||||
check('\\', r'[\]')
|
||||
check('/', r'[\]', normsep)
|
||||
check('\\', r'[/]', normsep)
|
||||
check('[/]', r'[/]', False)
|
||||
check(r'[\\]', r'[/]', False)
|
||||
check('\\', r'[\t]')
|
||||
check('/', r'[\t]', normsep)
|
||||
check('t', r'[\t]')
|
||||
check('\t', r'[\t]', False)
|
||||
|
||||
def test_sep_in_range(self):
|
||||
normsep = os.path.normcase('\\') == os.path.normcase('/')
|
||||
check = self.check_match
|
||||
check('a/b', 'a[.-0]b', not normsep)
|
||||
check('a\\b', 'a[.-0]b', False)
|
||||
check('a\\b', 'a[Z-^]b', not normsep)
|
||||
check('a/b', 'a[Z-^]b', False)
|
||||
|
||||
check('a/b', 'a[/-0]b', not normsep)
|
||||
check(r'a\b', 'a[/-0]b', False)
|
||||
check('a[/-0]b', 'a[/-0]b', False)
|
||||
check(r'a[\-0]b', 'a[/-0]b', False)
|
||||
|
||||
check('a/b', 'a[.-/]b')
|
||||
check(r'a\b', 'a[.-/]b', normsep)
|
||||
check('a[.-/]b', 'a[.-/]b', False)
|
||||
check(r'a[.-\]b', 'a[.-/]b', False)
|
||||
|
||||
check(r'a\b', r'a[\-^]b')
|
||||
check('a/b', r'a[\-^]b', normsep)
|
||||
check(r'a[\-^]b', r'a[\-^]b', False)
|
||||
check('a[/-^]b', r'a[\-^]b', False)
|
||||
|
||||
check(r'a\b', r'a[Z-\]b', not normsep)
|
||||
check('a/b', r'a[Z-\]b', False)
|
||||
check(r'a[Z-\]b', r'a[Z-\]b', False)
|
||||
check('a[Z-/]b', r'a[Z-\]b', False)
|
||||
|
||||
def test_warnings(self):
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter('error', Warning)
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
Fix :exc:`re.error` raised in :mod:`fnmatch` if the pattern contains a
|
||||
character range with upper bound lower than lower bound (e.g. ``[c-a]``).
|
||||
Now such ranges are interpreted as empty ranges.
|
Loading…
Reference in New Issue