bpo-29995: re.escape() now escapes only special characters. (#1007)
This commit is contained in:
parent
a6e395dffa
commit
5908300e4b
|
@ -786,7 +786,7 @@ form.
|
||||||
|
|
||||||
.. function:: escape(pattern)
|
.. function:: escape(pattern)
|
||||||
|
|
||||||
Escape all the characters in *pattern* except ASCII letters, numbers and ``'_'``.
|
Escape special characters in *pattern*.
|
||||||
This is useful if you want to match an arbitrary literal string that may
|
This is useful if you want to match an arbitrary literal string that may
|
||||||
have regular expression metacharacters in it. For example::
|
have regular expression metacharacters in it. For example::
|
||||||
|
|
||||||
|
@ -795,15 +795,19 @@ form.
|
||||||
|
|
||||||
>>> legal_chars = string.ascii_lowercase + string.digits + "!#$%&'*+-.^_`|~:"
|
>>> legal_chars = string.ascii_lowercase + string.digits + "!#$%&'*+-.^_`|~:"
|
||||||
>>> print('[%s]+' % re.escape(legal_chars))
|
>>> print('[%s]+' % re.escape(legal_chars))
|
||||||
[abcdefghijklmnopqrstuvwxyz0123456789\!\#\$\%\&\'\*\+\-\.\^_\`\|\~\:]+
|
[abcdefghijklmnopqrstuvwxyz0123456789!\#\$%&'\*\+\-\.\^_`\|~:]+
|
||||||
|
|
||||||
>>> operators = ['+', '-', '*', '/', '**']
|
>>> operators = ['+', '-', '*', '/', '**']
|
||||||
>>> print('|'.join(map(re.escape, sorted(operators, reverse=True))))
|
>>> print('|'.join(map(re.escape, sorted(operators, reverse=True))))
|
||||||
\/|\-|\+|\*\*|\*
|
/|\-|\+|\*\*|\*
|
||||||
|
|
||||||
.. versionchanged:: 3.3
|
.. versionchanged:: 3.3
|
||||||
The ``'_'`` character is no longer escaped.
|
The ``'_'`` character is no longer escaped.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.7
|
||||||
|
Only characters that can have special meaning in a regular expression
|
||||||
|
are escaped.
|
||||||
|
|
||||||
|
|
||||||
.. function:: purge()
|
.. function:: purge()
|
||||||
|
|
||||||
|
|
|
@ -303,7 +303,7 @@ whatsnew/3.2,,:gz,">>> with tarfile.open(name='myarchive.tar.gz', mode='w:gz') a
|
||||||
whatsnew/3.2,,:location,zope9-location = ${zope9:location}
|
whatsnew/3.2,,:location,zope9-location = ${zope9:location}
|
||||||
whatsnew/3.2,,:prefix,zope-conf = ${custom:prefix}/etc/zope.conf
|
whatsnew/3.2,,:prefix,zope-conf = ${custom:prefix}/etc/zope.conf
|
||||||
library/re,,`,!#$%&'*+-.^_`|~:
|
library/re,,`,!#$%&'*+-.^_`|~:
|
||||||
library/re,,`,\!\#\$\%\&\'\*\+\-\.\^_\`\|\~\:
|
library/re,,`,!\#\$%&'\*\+\-\.\^_`\|~:
|
||||||
library/tarfile,,:xz,'x:xz'
|
library/tarfile,,:xz,'x:xz'
|
||||||
library/xml.etree.elementtree,,:sometag,prefix:sometag
|
library/xml.etree.elementtree,,:sometag,prefix:sometag
|
||||||
library/xml.etree.elementtree,,:fictional,"<actors xmlns:fictional=""http://characters.example.com"""
|
library/xml.etree.elementtree,,:fictional,"<actors xmlns:fictional=""http://characters.example.com"""
|
||||||
|
|
|
|
@ -221,8 +221,8 @@ class ReplaceDialogTest(unittest.TestCase):
|
||||||
self.assertIn('Invalid Replace Expression', showerror.message)
|
self.assertIn('Invalid Replace Expression', showerror.message)
|
||||||
|
|
||||||
# test access method
|
# test access method
|
||||||
self.engine.setcookedpat("\'")
|
self.engine.setcookedpat("?")
|
||||||
equal(pv.get(), "\\'")
|
equal(pv.get(), "\\?")
|
||||||
|
|
||||||
def test_replace_backwards(self):
|
def test_replace_backwards(self):
|
||||||
equal = self.assertEqual
|
equal = self.assertEqual
|
||||||
|
|
36
Lib/re.py
36
Lib/re.py
|
@ -241,39 +241,21 @@ def template(pattern, flags=0):
|
||||||
"Compile a template pattern, returning a pattern object"
|
"Compile a template pattern, returning a pattern object"
|
||||||
return _compile(pattern, flags|T)
|
return _compile(pattern, flags|T)
|
||||||
|
|
||||||
_alphanum_str = frozenset(
|
# SPECIAL_CHARS
|
||||||
"_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
|
# closing ')', '}' and ']'
|
||||||
_alphanum_bytes = frozenset(
|
# '-' (a range in character set)
|
||||||
b"_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
|
# '#' (comment) and WHITESPACE (ignored) in verbose mode
|
||||||
|
_special_chars_map = {i: '\\' + chr(i) for i in b'()[]{}?*+-|^$\\.# \t\n\r\v\f'}
|
||||||
|
|
||||||
def escape(pattern):
|
def escape(pattern):
|
||||||
"""
|
"""
|
||||||
Escape all the characters in pattern except ASCII letters, numbers and '_'.
|
Escape special characters in a string.
|
||||||
"""
|
"""
|
||||||
if isinstance(pattern, str):
|
if isinstance(pattern, str):
|
||||||
alphanum = _alphanum_str
|
return pattern.translate(_special_chars_map)
|
||||||
s = list(pattern)
|
|
||||||
for i, c in enumerate(pattern):
|
|
||||||
if c not in alphanum:
|
|
||||||
if c == "\000":
|
|
||||||
s[i] = "\\000"
|
|
||||||
else:
|
else:
|
||||||
s[i] = "\\" + c
|
pattern = str(pattern, 'latin1')
|
||||||
return "".join(s)
|
return pattern.translate(_special_chars_map).encode('latin1')
|
||||||
else:
|
|
||||||
alphanum = _alphanum_bytes
|
|
||||||
s = []
|
|
||||||
esc = ord(b"\\")
|
|
||||||
for c in pattern:
|
|
||||||
if c in alphanum:
|
|
||||||
s.append(c)
|
|
||||||
else:
|
|
||||||
if c == 0:
|
|
||||||
s.extend(b"\\000")
|
|
||||||
else:
|
|
||||||
s.append(esc)
|
|
||||||
s.append(c)
|
|
||||||
return bytes(s)
|
|
||||||
|
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# internals
|
# internals
|
||||||
|
|
|
@ -904,7 +904,7 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertEqual(re.search(r"a\s", "a ").group(0), "a ")
|
self.assertEqual(re.search(r"a\s", "a ").group(0), "a ")
|
||||||
|
|
||||||
def assertMatch(self, pattern, text, match=None, span=None,
|
def assertMatch(self, pattern, text, match=None, span=None,
|
||||||
matcher=re.match):
|
matcher=re.fullmatch):
|
||||||
if match is None and span is None:
|
if match is None and span is None:
|
||||||
# the pattern matches the whole text
|
# the pattern matches the whole text
|
||||||
match = text
|
match = text
|
||||||
|
@ -917,37 +917,38 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertEqual(m.group(), match)
|
self.assertEqual(m.group(), match)
|
||||||
self.assertEqual(m.span(), span)
|
self.assertEqual(m.span(), span)
|
||||||
|
|
||||||
|
LITERAL_CHARS = string.ascii_letters + string.digits + '!"%&\',/:;<=>@_`~'
|
||||||
|
|
||||||
def test_re_escape(self):
|
def test_re_escape(self):
|
||||||
alnum_chars = string.ascii_letters + string.digits + '_'
|
|
||||||
p = ''.join(chr(i) for i in range(256))
|
p = ''.join(chr(i) for i in range(256))
|
||||||
for c in p:
|
for c in p:
|
||||||
if c in alnum_chars:
|
|
||||||
self.assertEqual(re.escape(c), c)
|
|
||||||
elif c == '\x00':
|
|
||||||
self.assertEqual(re.escape(c), '\\000')
|
|
||||||
else:
|
|
||||||
self.assertEqual(re.escape(c), '\\' + c)
|
|
||||||
self.assertMatch(re.escape(c), c)
|
self.assertMatch(re.escape(c), c)
|
||||||
|
self.assertMatch('[' + re.escape(c) + ']', c)
|
||||||
|
self.assertMatch('(?x)' + re.escape(c), c)
|
||||||
self.assertMatch(re.escape(p), p)
|
self.assertMatch(re.escape(p), p)
|
||||||
|
for c in '-.]{}':
|
||||||
|
self.assertEqual(re.escape(c)[:1], '\\')
|
||||||
|
literal_chars = self.LITERAL_CHARS
|
||||||
|
self.assertEqual(re.escape(literal_chars), literal_chars)
|
||||||
|
|
||||||
def test_re_escape_byte(self):
|
def test_re_escape_bytes(self):
|
||||||
alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')
|
|
||||||
p = bytes(range(256))
|
p = bytes(range(256))
|
||||||
for i in p:
|
for i in p:
|
||||||
b = bytes([i])
|
b = bytes([i])
|
||||||
if b in alnum_chars:
|
|
||||||
self.assertEqual(re.escape(b), b)
|
|
||||||
elif i == 0:
|
|
||||||
self.assertEqual(re.escape(b), b'\\000')
|
|
||||||
else:
|
|
||||||
self.assertEqual(re.escape(b), b'\\' + b)
|
|
||||||
self.assertMatch(re.escape(b), b)
|
self.assertMatch(re.escape(b), b)
|
||||||
|
self.assertMatch(b'[' + re.escape(b) + b']', b)
|
||||||
|
self.assertMatch(b'(?x)' + re.escape(b), b)
|
||||||
self.assertMatch(re.escape(p), p)
|
self.assertMatch(re.escape(p), p)
|
||||||
|
for i in b'-.]{}':
|
||||||
|
b = bytes([i])
|
||||||
|
self.assertEqual(re.escape(b)[:1], b'\\')
|
||||||
|
literal_chars = self.LITERAL_CHARS.encode('ascii')
|
||||||
|
self.assertEqual(re.escape(literal_chars), literal_chars)
|
||||||
|
|
||||||
def test_re_escape_non_ascii(self):
|
def test_re_escape_non_ascii(self):
|
||||||
s = 'xxx\u2620\u2620\u2620xxx'
|
s = 'xxx\u2620\u2620\u2620xxx'
|
||||||
s_escaped = re.escape(s)
|
s_escaped = re.escape(s)
|
||||||
self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
|
self.assertEqual(s_escaped, s)
|
||||||
self.assertMatch(s_escaped, s)
|
self.assertMatch(s_escaped, s)
|
||||||
self.assertMatch('.%s+.' % re.escape('\u2620'), s,
|
self.assertMatch('.%s+.' % re.escape('\u2620'), s,
|
||||||
'x\u2620\u2620\u2620x', (2, 7), re.search)
|
'x\u2620\u2620\u2620x', (2, 7), re.search)
|
||||||
|
@ -955,7 +956,7 @@ class ReTests(unittest.TestCase):
|
||||||
def test_re_escape_non_ascii_bytes(self):
|
def test_re_escape_non_ascii_bytes(self):
|
||||||
b = 'y\u2620y\u2620y'.encode('utf-8')
|
b = 'y\u2620y\u2620y'.encode('utf-8')
|
||||||
b_escaped = re.escape(b)
|
b_escaped = re.escape(b)
|
||||||
self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
|
self.assertEqual(b_escaped, b)
|
||||||
self.assertMatch(b_escaped, b)
|
self.assertMatch(b_escaped, b)
|
||||||
res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
|
res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
|
||||||
self.assertEqual(len(res), 2)
|
self.assertEqual(len(res), 2)
|
||||||
|
|
|
@ -320,6 +320,8 @@ Library
|
||||||
- bpo-29998: Pickling and copying ImportError now preserves name and path
|
- bpo-29998: Pickling and copying ImportError now preserves name and path
|
||||||
attributes.
|
attributes.
|
||||||
|
|
||||||
|
- bpo-29995: re.escape() now escapes only regex special characters.
|
||||||
|
|
||||||
- bpo-29962: Add math.remainder operation, implementing remainder
|
- bpo-29962: Add math.remainder operation, implementing remainder
|
||||||
as specified in IEEE 754.
|
as specified in IEEE 754.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue