Issue #23622: Unknown escapes in regular expressions that consist of ``'\'``
and ASCII letter now raise a deprecation warning and will be forbidden in Python 3.6.
This commit is contained in:
parent
793c14ea29
commit
a54aae0683
|
@ -1138,7 +1138,7 @@ Empty matches are replaced only when they're not adjacent to a previous match.
|
||||||
|
|
||||||
If *replacement* is a string, any backslash escapes in it are processed. That
|
If *replacement* is a string, any backslash escapes in it are processed. That
|
||||||
is, ``\n`` is converted to a single newline character, ``\r`` is converted to a
|
is, ``\n`` is converted to a single newline character, ``\r`` is converted to a
|
||||||
carriage return, and so forth. Unknown escapes such as ``\j`` are left alone.
|
carriage return, and so forth. Unknown escapes such as ``\&`` are left alone.
|
||||||
Backreferences, such as ``\6``, are replaced with the substring matched by the
|
Backreferences, such as ``\6``, are replaced with the substring matched by the
|
||||||
corresponding group in the RE. This lets you incorporate portions of the
|
corresponding group in the RE. This lets you incorporate portions of the
|
||||||
original text in the resulting replacement string.
|
original text in the resulting replacement string.
|
||||||
|
|
|
@ -438,6 +438,10 @@ three digits in length.
|
||||||
.. versionchanged:: 3.3
|
.. versionchanged:: 3.3
|
||||||
The ``'\u'`` and ``'\U'`` escape sequences have been added.
|
The ``'\u'`` and ``'\U'`` escape sequences have been added.
|
||||||
|
|
||||||
|
.. deprecated-removed:: 3.5 3.6
|
||||||
|
Unknown escapes consist of ``'\'`` and ASCII letter now raise a
|
||||||
|
deprecation warning and will be forbidden in Python 3.6.
|
||||||
|
|
||||||
|
|
||||||
.. seealso::
|
.. seealso::
|
||||||
|
|
||||||
|
@ -687,7 +691,7 @@ form.
|
||||||
*string* is returned unchanged. *repl* can be a string or a function; if it is
|
*string* is returned unchanged. *repl* can be a string or a function; if it is
|
||||||
a string, any backslash escapes in it are processed. That is, ``\n`` is
|
a string, any backslash escapes in it are processed. That is, ``\n`` is
|
||||||
converted to a single newline character, ``\r`` is converted to a carriage return, and
|
converted to a single newline character, ``\r`` is converted to a carriage return, and
|
||||||
so forth. Unknown escapes such as ``\j`` are left alone. Backreferences, such
|
so forth. Unknown escapes such as ``\&`` are left alone. Backreferences, such
|
||||||
as ``\6``, are replaced with the substring matched by group 6 in the pattern.
|
as ``\6``, are replaced with the substring matched by group 6 in the pattern.
|
||||||
For example:
|
For example:
|
||||||
|
|
||||||
|
@ -732,6 +736,10 @@ form.
|
||||||
.. versionchanged:: 3.5
|
.. versionchanged:: 3.5
|
||||||
Unmatched groups are replaced with an empty string.
|
Unmatched groups are replaced with an empty string.
|
||||||
|
|
||||||
|
.. deprecated-removed:: 3.5 3.6
|
||||||
|
Unknown escapes consist of ``'\'`` and ASCII letter now raise a
|
||||||
|
deprecation warning and will be forbidden in Python 3.6.
|
||||||
|
|
||||||
|
|
||||||
.. function:: subn(pattern, repl, string, count=0, flags=0)
|
.. function:: subn(pattern, repl, string, count=0, flags=0)
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ DIGITS = frozenset("0123456789")
|
||||||
|
|
||||||
OCTDIGITS = frozenset("01234567")
|
OCTDIGITS = frozenset("01234567")
|
||||||
HEXDIGITS = frozenset("0123456789abcdefABCDEF")
|
HEXDIGITS = frozenset("0123456789abcdefABCDEF")
|
||||||
|
ASCIILETTERS = frozenset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
||||||
|
|
||||||
WHITESPACE = frozenset(" \t\n\r\v\f")
|
WHITESPACE = frozenset(" \t\n\r\v\f")
|
||||||
|
|
||||||
|
@ -344,6 +345,10 @@ def _class_escape(source, escape):
|
||||||
elif c in DIGITS:
|
elif c in DIGITS:
|
||||||
raise ValueError
|
raise ValueError
|
||||||
if len(escape) == 2:
|
if len(escape) == 2:
|
||||||
|
if c in ASCIILETTERS:
|
||||||
|
import warnings
|
||||||
|
warnings.warn('bad escape %s' % escape,
|
||||||
|
DeprecationWarning, stacklevel=8)
|
||||||
return LITERAL, ord(escape[1])
|
return LITERAL, ord(escape[1])
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
@ -407,6 +412,10 @@ def _escape(source, escape, state):
|
||||||
return GROUPREF, group
|
return GROUPREF, group
|
||||||
raise ValueError
|
raise ValueError
|
||||||
if len(escape) == 2:
|
if len(escape) == 2:
|
||||||
|
if c in ASCIILETTERS:
|
||||||
|
import warnings
|
||||||
|
warnings.warn('bad escape %s' % escape,
|
||||||
|
DeprecationWarning, stacklevel=8)
|
||||||
return LITERAL, ord(escape[1])
|
return LITERAL, ord(escape[1])
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
@ -903,7 +912,10 @@ def parse_template(source, pattern):
|
||||||
try:
|
try:
|
||||||
this = chr(ESCAPES[this][1])
|
this = chr(ESCAPES[this][1])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
if c in ASCIILETTERS:
|
||||||
|
import warnings
|
||||||
|
warnings.warn('bad escape %s' % this,
|
||||||
|
DeprecationWarning, stacklevel=5)
|
||||||
lappend(this)
|
lappend(this)
|
||||||
else:
|
else:
|
||||||
lappend(this)
|
lappend(this)
|
||||||
|
|
|
@ -87,7 +87,7 @@ tests = [
|
||||||
(r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
|
(r'[\a][\b][\f][\n][\r][\t][\v]', '\a\b\f\n\r\t\v', SUCCEED, 'found', '\a\b\f\n\r\t\v'),
|
||||||
# NOTE: not an error under PCRE/PRE:
|
# NOTE: not an error under PCRE/PRE:
|
||||||
(r'\u', '', SYNTAX_ERROR), # A Perl escape
|
(r'\u', '', SYNTAX_ERROR), # A Perl escape
|
||||||
(r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
|
# (r'\c\e\g\h\i\j\k\m\o\p\q\y\z', 'ceghijkmopqyz', SUCCEED, 'found', 'ceghijkmopqyz'),
|
||||||
(r'\xff', '\377', SUCCEED, 'found', chr(255)),
|
(r'\xff', '\377', SUCCEED, 'found', chr(255)),
|
||||||
# new \x semantics
|
# new \x semantics
|
||||||
(r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
|
(r'\x00ffffffffffffff', '\377', FAIL, 'found', chr(255)),
|
||||||
|
@ -607,8 +607,8 @@ xyzabc
|
||||||
# new \x semantics
|
# new \x semantics
|
||||||
(r'\x00ff', '\377', FAIL),
|
(r'\x00ff', '\377', FAIL),
|
||||||
# (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
|
# (r'\x00ff', '\377', SUCCEED, 'found', chr(255)),
|
||||||
(r'\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
|
(r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', '\t\n\v\r\f\a'),
|
||||||
('\t\n\v\r\f\a\g', '\t\n\v\r\f\ag', SUCCEED, 'found', '\t\n\v\r\f\ag'),
|
('\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', '\t\n\v\r\f\a'),
|
||||||
(r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
|
(r'\t\n\v\r\f\a', '\t\n\v\r\f\a', SUCCEED, 'found', chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)),
|
||||||
(r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'),
|
(r'[\t][\n][\v][\r][\f][\b]', '\t\n\v\r\f\b', SUCCEED, 'found', '\t\n\v\r\f\b'),
|
||||||
|
|
||||||
|
|
|
@ -100,11 +100,14 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
|
self.assertEqual(re.sub('(?P<unk>x)', '\g<unk>\g<unk>', 'xx'), 'xxxx')
|
||||||
self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
|
self.assertEqual(re.sub('(?P<unk>x)', '\g<1>\g<1>', 'xx'), 'xxxx')
|
||||||
|
|
||||||
self.assertEqual(re.sub('a',r'\t\n\v\r\f\a\b\B\Z\a\A\w\W\s\S\d\D','a'),
|
self.assertEqual(re.sub('a', r'\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
|
||||||
'\t\n\v\r\f\a\b\\B\\Z\a\\A\\w\\W\\s\\S\\d\\D')
|
self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'), '\t\n\v\r\f\a\b')
|
||||||
self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'), '\t\n\v\r\f\a')
|
self.assertEqual(re.sub('a', '\t\n\v\r\f\a\b', 'a'),
|
||||||
self.assertEqual(re.sub('a', '\t\n\v\r\f\a', 'a'),
|
(chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8)))
|
||||||
(chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)))
|
for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
||||||
|
with self.subTest(c):
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)
|
||||||
|
|
||||||
self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
|
self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
|
||||||
|
|
||||||
|
@ -551,14 +554,23 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertEqual(re.match(r"\(", '(').group(), '(')
|
self.assertEqual(re.match(r"\(", '(').group(), '(')
|
||||||
self.assertIsNone(re.match(r"\(", ')'))
|
self.assertIsNone(re.match(r"\(", ')'))
|
||||||
self.assertEqual(re.match(r"\\", '\\').group(), '\\')
|
self.assertEqual(re.match(r"\\", '\\').group(), '\\')
|
||||||
self.assertEqual(re.match(r"\y", 'y').group(), 'y')
|
|
||||||
self.assertIsNone(re.match(r"\y", 'z'))
|
|
||||||
self.assertEqual(re.match(r"[\]]", ']').group(), ']')
|
self.assertEqual(re.match(r"[\]]", ']').group(), ']')
|
||||||
self.assertIsNone(re.match(r"[\]]", '['))
|
self.assertIsNone(re.match(r"[\]]", '['))
|
||||||
self.assertEqual(re.match(r"[a\-c]", '-').group(), '-')
|
self.assertEqual(re.match(r"[a\-c]", '-').group(), '-')
|
||||||
self.assertIsNone(re.match(r"[a\-c]", 'b'))
|
self.assertIsNone(re.match(r"[a\-c]", 'b'))
|
||||||
self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^')
|
self.assertEqual(re.match(r"[\^a]+", 'a^').group(), 'a^')
|
||||||
self.assertIsNone(re.match(r"[\^a]+", 'b'))
|
self.assertIsNone(re.match(r"[\^a]+", 'b'))
|
||||||
|
re.purge() # for warnings
|
||||||
|
for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
|
||||||
|
with self.subTest(c):
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
self.assertEqual(re.fullmatch('\\%c' % c, c).group(), c)
|
||||||
|
self.assertIsNone(re.match('\\%c' % c, 'a'))
|
||||||
|
for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
|
||||||
|
with self.subTest(c):
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
self.assertEqual(re.fullmatch('[\\%c]' % c, c).group(), c)
|
||||||
|
self.assertIsNone(re.match('[\\%c]' % c, 'a'))
|
||||||
|
|
||||||
def test_string_boundaries(self):
|
def test_string_boundaries(self):
|
||||||
# See http://bugs.python.org/issue10713
|
# See http://bugs.python.org/issue10713
|
||||||
|
@ -907,8 +919,10 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i])))
|
self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i])))
|
||||||
self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
|
self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
|
||||||
self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
|
self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
|
||||||
self.assertTrue(re.match(br"\u", b'u'))
|
with self.assertWarns(DeprecationWarning):
|
||||||
self.assertTrue(re.match(br"\U", b'U'))
|
self.assertTrue(re.match(br"\u1234", b'u1234'))
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
self.assertTrue(re.match(br"\U00012345", b'U00012345'))
|
||||||
self.assertTrue(re.match(br"\0", b"\000"))
|
self.assertTrue(re.match(br"\0", b"\000"))
|
||||||
self.assertTrue(re.match(br"\08", b"\0008"))
|
self.assertTrue(re.match(br"\08", b"\0008"))
|
||||||
self.assertTrue(re.match(br"\01", b"\001"))
|
self.assertTrue(re.match(br"\01", b"\001"))
|
||||||
|
@ -928,8 +942,10 @@ class ReTests(unittest.TestCase):
|
||||||
self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
|
self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
|
||||||
self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
|
self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
|
||||||
self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
|
self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
|
||||||
self.assertTrue(re.match(br"[\u]", b'u'))
|
with self.assertWarns(DeprecationWarning):
|
||||||
self.assertTrue(re.match(br"[\U]", b'U'))
|
self.assertTrue(re.match(br"[\u1234]", b'u'))
|
||||||
|
with self.assertWarns(DeprecationWarning):
|
||||||
|
self.assertTrue(re.match(br"[\U00012345]", b'U'))
|
||||||
self.assertRaises(re.error, re.match, br"[\567]", b"")
|
self.assertRaises(re.error, re.match, br"[\567]", b"")
|
||||||
self.assertRaises(re.error, re.match, br"[\911]", b"")
|
self.assertRaises(re.error, re.match, br"[\911]", b"")
|
||||||
self.assertRaises(re.error, re.match, br"[\x1z]", b"")
|
self.assertRaises(re.error, re.match, br"[\x1z]", b"")
|
||||||
|
@ -1304,8 +1320,9 @@ class ReTests(unittest.TestCase):
|
||||||
def test_bug_13899(self):
|
def test_bug_13899(self):
|
||||||
# Issue #13899: re pattern r"[\A]" should work like "A" but matches
|
# Issue #13899: re pattern r"[\A]" should work like "A" but matches
|
||||||
# nothing. Ditto B and Z.
|
# nothing. Ditto B and Z.
|
||||||
self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
|
with self.assertWarns(DeprecationWarning):
|
||||||
['A', 'B', '\b', 'C', 'Z'])
|
self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
|
||||||
|
['A', 'B', '\b', 'C', 'Z'])
|
||||||
|
|
||||||
@bigmemtest(size=_2G, memuse=1)
|
@bigmemtest(size=_2G, memuse=1)
|
||||||
def test_large_search(self, size):
|
def test_large_search(self, size):
|
||||||
|
|
|
@ -30,6 +30,10 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #23622: Unknown escapes in regular expressions that consist of ``'\'``
|
||||||
|
and ASCII letter now raise a deprecation warning and will be forbidden in
|
||||||
|
Python 3.6.
|
||||||
|
|
||||||
- Issue #23671: string.Template now allows to specify the "self" parameter as
|
- Issue #23671: string.Template now allows to specify the "self" parameter as
|
||||||
keyword argument. string.Formatter now allows to specify the "self" and
|
keyword argument. string.Formatter now allows to specify the "self" and
|
||||||
the "format_string" parameters as keyword arguments.
|
the "format_string" parameters as keyword arguments.
|
||||||
|
|
Loading…
Reference in New Issue