Issue #27030: Unknown escapes consisting of ``'\'`` and ASCII letter in
regular expressions now are errors.
This commit is contained in:
parent
d35bf03284
commit
9bd85b83f6
|
@ -317,8 +317,9 @@ The special characters are:
|
|||
|
||||
|
||||
The special sequences consist of ``'\'`` and a character from the list below.
|
||||
If the ordinary character is not on the list, then the resulting RE will match
|
||||
the second character. For example, ``\$`` matches the character ``'$'``.
|
||||
If the ordinary character is not ASCII digit or ASCII letter, then the
|
||||
resulting RE will match the second character. For example, ``\$`` matches the
|
||||
character ``'$'``.
|
||||
|
||||
``\number``
|
||||
Matches the contents of the group of the same number. Groups are numbered
|
||||
|
@ -438,9 +439,8 @@ three digits in length.
|
|||
.. versionchanged:: 3.3
|
||||
The ``'\u'`` and ``'\U'`` escape sequences have been added.
|
||||
|
||||
.. deprecated-removed:: 3.5 3.6
|
||||
Unknown escapes consist of ``'\'`` and ASCII letter now raise a
|
||||
deprecation warning and will be forbidden in Python 3.6.
|
||||
.. versionchanged:: 3.6
|
||||
Unknown escapes consisting of ``'\'`` and ASCII letter now are errors.
|
||||
|
||||
|
||||
.. seealso::
|
||||
|
@ -528,11 +528,11 @@ form.
|
|||
current locale. The use of this flag is discouraged as the locale mechanism
|
||||
is very unreliable, and it only handles one "culture" at a time anyway;
|
||||
you should use Unicode matching instead, which is the default in Python 3
|
||||
for Unicode (str) patterns. This flag makes sense only with bytes patterns.
|
||||
for Unicode (str) patterns. This flag can be used only with bytes patterns.
|
||||
|
||||
.. deprecated-removed:: 3.5 3.6
|
||||
Deprecated the use of :const:`re.LOCALE` with string patterns or
|
||||
:const:`re.ASCII`.
|
||||
.. versionchanged:: 3.6
|
||||
:const:`re.LOCALE` can be used only with bytes patterns and is
|
||||
not compatible with :const:`re.ASCII`.
|
||||
|
||||
|
||||
.. data:: M
|
||||
|
@ -738,9 +738,8 @@ form.
|
|||
.. versionchanged:: 3.5
|
||||
Unmatched groups are replaced with an empty string.
|
||||
|
||||
.. deprecated-removed:: 3.5 3.6
|
||||
Unknown escapes consist of ``'\'`` and ASCII letter now raise a
|
||||
deprecation warning and will be forbidden in Python 3.6.
|
||||
.. versionchanged:: 3.6
|
||||
Unknown escapes consisting of ``'\'`` and ASCII letter now are errors.
|
||||
|
||||
|
||||
.. function:: subn(pattern, repl, string, count=0, flags=0)
|
||||
|
|
|
@ -282,33 +282,6 @@ class Tokenizer:
|
|||
def error(self, msg, offset=0):
|
||||
return error(msg, self.string, self.tell() - offset)
|
||||
|
||||
# The following three functions are not used in this module anymore, but we keep
|
||||
# them here (with DeprecationWarnings) for backwards compatibility.
|
||||
|
||||
def isident(char):
|
||||
import warnings
|
||||
warnings.warn('sre_parse.isident() will be removed in 3.5',
|
||||
DeprecationWarning, stacklevel=2)
|
||||
return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
|
||||
|
||||
def isdigit(char):
|
||||
import warnings
|
||||
warnings.warn('sre_parse.isdigit() will be removed in 3.5',
|
||||
DeprecationWarning, stacklevel=2)
|
||||
return "0" <= char <= "9"
|
||||
|
||||
def isname(name):
|
||||
import warnings
|
||||
warnings.warn('sre_parse.isname() will be removed in 3.5',
|
||||
DeprecationWarning, stacklevel=2)
|
||||
# check that group name is a valid string
|
||||
if not isident(name[0]):
|
||||
return False
|
||||
for char in name[1:]:
|
||||
if not isident(char) and not isdigit(char):
|
||||
return False
|
||||
return True
|
||||
|
||||
def _class_escape(source, escape):
|
||||
# handle escape code inside character class
|
||||
code = ESCAPES.get(escape)
|
||||
|
@ -351,9 +324,7 @@ def _class_escape(source, escape):
|
|||
raise ValueError
|
||||
if len(escape) == 2:
|
||||
if c in ASCIILETTERS:
|
||||
import warnings
|
||||
warnings.warn('bad escape %s' % escape,
|
||||
DeprecationWarning, stacklevel=8)
|
||||
raise source.error('bad escape %s' % escape, len(escape))
|
||||
return LITERAL, ord(escape[1])
|
||||
except ValueError:
|
||||
pass
|
||||
|
@ -418,9 +389,7 @@ def _escape(source, escape, state):
|
|||
raise source.error("invalid group reference", len(escape))
|
||||
if len(escape) == 2:
|
||||
if c in ASCIILETTERS:
|
||||
import warnings
|
||||
warnings.warn('bad escape %s' % escape,
|
||||
DeprecationWarning, stacklevel=8)
|
||||
raise source.error("bad escape %s" % escape, len(escape))
|
||||
return LITERAL, ord(escape[1])
|
||||
except ValueError:
|
||||
pass
|
||||
|
@ -798,10 +767,7 @@ def fix_flags(src, flags):
|
|||
# Check and fix flags according to the type of pattern (str or bytes)
|
||||
if isinstance(src, str):
|
||||
if flags & SRE_FLAG_LOCALE:
|
||||
import warnings
|
||||
warnings.warn("LOCALE flag with a str pattern is deprecated. "
|
||||
"Will be an error in 3.6",
|
||||
DeprecationWarning, stacklevel=6)
|
||||
raise ValueError("cannot use LOCALE flag with a str pattern")
|
||||
if not flags & SRE_FLAG_ASCII:
|
||||
flags |= SRE_FLAG_UNICODE
|
||||
elif flags & SRE_FLAG_UNICODE:
|
||||
|
@ -810,10 +776,7 @@ def fix_flags(src, flags):
|
|||
if flags & SRE_FLAG_UNICODE:
|
||||
raise ValueError("cannot use UNICODE flag with a bytes pattern")
|
||||
if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII:
|
||||
import warnings
|
||||
warnings.warn("ASCII and LOCALE flags are incompatible. "
|
||||
"Will be an error in 3.6",
|
||||
DeprecationWarning, stacklevel=6)
|
||||
raise ValueError("ASCII and LOCALE flags are incompatible")
|
||||
return flags
|
||||
|
||||
def parse(str, flags=0, pattern=None):
|
||||
|
@ -914,9 +877,7 @@ def parse_template(source, pattern):
|
|||
this = chr(ESCAPES[this][1])
|
||||
except KeyError:
|
||||
if c in ASCIILETTERS:
|
||||
import warnings
|
||||
warnings.warn('bad escape %s' % this,
|
||||
DeprecationWarning, stacklevel=4)
|
||||
raise s.error('bad escape %s' % this, len(this))
|
||||
lappend(this)
|
||||
else:
|
||||
lappend(this)
|
||||
|
|
|
@ -124,7 +124,7 @@ class ReTests(unittest.TestCase):
|
|||
(chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8)))
|
||||
for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
|
||||
with self.subTest(c):
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
with self.assertRaises(re.error):
|
||||
self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)
|
||||
|
||||
self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
|
||||
|
@ -633,14 +633,10 @@ class ReTests(unittest.TestCase):
|
|||
re.purge() # for warnings
|
||||
for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
|
||||
with self.subTest(c):
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertEqual(re.fullmatch('\\%c' % c, c).group(), c)
|
||||
self.assertIsNone(re.match('\\%c' % c, 'a'))
|
||||
self.assertRaises(re.error, re.compile, '\\%c' % c)
|
||||
for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
|
||||
with self.subTest(c):
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertEqual(re.fullmatch('[\\%c]' % c, c).group(), c)
|
||||
self.assertIsNone(re.match('[\\%c]' % c, 'a'))
|
||||
self.assertRaises(re.error, re.compile, '[\\%c]' % c)
|
||||
|
||||
def test_string_boundaries(self):
|
||||
# See http://bugs.python.org/issue10713
|
||||
|
@ -993,10 +989,8 @@ class ReTests(unittest.TestCase):
|
|||
self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i])))
|
||||
self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
|
||||
self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertTrue(re.match(br"\u1234", b'u1234'))
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertTrue(re.match(br"\U00012345", b'U00012345'))
|
||||
self.assertRaises(re.error, re.compile, br"\u1234")
|
||||
self.assertRaises(re.error, re.compile, br"\U00012345")
|
||||
self.assertTrue(re.match(br"\0", b"\000"))
|
||||
self.assertTrue(re.match(br"\08", b"\0008"))
|
||||
self.assertTrue(re.match(br"\01", b"\001"))
|
||||
|
@ -1018,10 +1012,8 @@ class ReTests(unittest.TestCase):
|
|||
self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
|
||||
self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
|
||||
self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertTrue(re.match(br"[\u1234]", b'u'))
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertTrue(re.match(br"[\U00012345]", b'U'))
|
||||
self.assertRaises(re.error, re.compile, br"[\u1234]")
|
||||
self.assertRaises(re.error, re.compile, br"[\U00012345]")
|
||||
self.checkPatternError(br"[\567]",
|
||||
r'octal escape value \567 outside of '
|
||||
r'range 0-0o377', 1)
|
||||
|
@ -1363,12 +1355,12 @@ class ReTests(unittest.TestCase):
|
|||
if bletter:
|
||||
self.assertIsNone(pat.match(bletter))
|
||||
# Incompatibilities
|
||||
self.assertWarns(DeprecationWarning, re.compile, '', re.LOCALE)
|
||||
self.assertWarns(DeprecationWarning, re.compile, '(?L)')
|
||||
self.assertWarns(DeprecationWarning, re.compile, b'', re.LOCALE | re.ASCII)
|
||||
self.assertWarns(DeprecationWarning, re.compile, b'(?L)', re.ASCII)
|
||||
self.assertWarns(DeprecationWarning, re.compile, b'(?a)', re.LOCALE)
|
||||
self.assertWarns(DeprecationWarning, re.compile, b'(?aL)')
|
||||
self.assertRaises(ValueError, re.compile, '', re.LOCALE)
|
||||
self.assertRaises(ValueError, re.compile, '(?L)')
|
||||
self.assertRaises(ValueError, re.compile, b'', re.LOCALE | re.ASCII)
|
||||
self.assertRaises(ValueError, re.compile, b'(?L)', re.ASCII)
|
||||
self.assertRaises(ValueError, re.compile, b'(?a)', re.LOCALE)
|
||||
self.assertRaises(ValueError, re.compile, b'(?aL)')
|
||||
|
||||
def test_bug_6509(self):
|
||||
# Replacement strings of both types must parse properly.
|
||||
|
@ -1419,13 +1411,6 @@ class ReTests(unittest.TestCase):
|
|||
# Test behaviour when not given a string or pattern as parameter
|
||||
self.assertRaises(TypeError, re.compile, 0)
|
||||
|
||||
def test_bug_13899(self):
|
||||
# Issue #13899: re pattern r"[\A]" should work like "A" but matches
|
||||
# nothing. Ditto B and Z.
|
||||
with self.assertWarns(DeprecationWarning):
|
||||
self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
|
||||
['A', 'B', '\b', 'C', 'Z'])
|
||||
|
||||
@bigmemtest(size=_2G, memuse=1)
|
||||
def test_large_search(self, size):
|
||||
# Issue #10182: indices were 32-bit-truncated.
|
||||
|
|
Loading…
Reference in New Issue