diff --git a/Doc/library/re.rst b/Doc/library/re.rst index ceb795976d5..52d2ec0ac6d 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -317,8 +317,9 @@ The special characters are: The special sequences consist of ``'\'`` and a character from the list below. -If the ordinary character is not on the list, then the resulting RE will match -the second character. For example, ``\$`` matches the character ``'$'``. +If the ordinary character is not ASCII digit or ASCII letter, then the +resulting RE will match the second character. For example, ``\$`` matches the +character ``'$'``. ``\number`` Matches the contents of the group of the same number. Groups are numbered @@ -438,9 +439,8 @@ three digits in length. .. versionchanged:: 3.3 The ``'\u'`` and ``'\U'`` escape sequences have been added. -.. deprecated-removed:: 3.5 3.6 - Unknown escapes consist of ``'\'`` and ASCII letter now raise a - deprecation warning and will be forbidden in Python 3.6. +.. versionchanged:: 3.6 + Unknown escapes consisting of ``'\'`` and ASCII letter now are errors. .. seealso:: @@ -528,11 +528,11 @@ form. current locale. The use of this flag is discouraged as the locale mechanism is very unreliable, and it only handles one "culture" at a time anyway; you should use Unicode matching instead, which is the default in Python 3 - for Unicode (str) patterns. This flag makes sense only with bytes patterns. + for Unicode (str) patterns. This flag can be used only with bytes patterns. - .. deprecated-removed:: 3.5 3.6 - Deprecated the use of :const:`re.LOCALE` with string patterns or - :const:`re.ASCII`. + .. versionchanged:: 3.6 + :const:`re.LOCALE` can be used only with bytes patterns and is + not compatible with :const:`re.ASCII`. .. data:: M @@ -738,9 +738,8 @@ form. .. versionchanged:: 3.5 Unmatched groups are replaced with an empty string. - .. deprecated-removed:: 3.5 3.6 - Unknown escapes consist of ``'\'`` and ASCII letter now raise a - deprecation warning and will be forbidden in Python 3.6. + .. versionchanged:: 3.6 + Unknown escapes consisting of ``'\'`` and ASCII letter now are errors. .. function:: subn(pattern, repl, string, count=0, flags=0) diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 4ff50d1006a..521e379e720 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -282,33 +282,6 @@ class Tokenizer: def error(self, msg, offset=0): return error(msg, self.string, self.tell() - offset) -# The following three functions are not used in this module anymore, but we keep -# them here (with DeprecationWarnings) for backwards compatibility. - -def isident(char): - import warnings - warnings.warn('sre_parse.isident() will be removed in 3.5', - DeprecationWarning, stacklevel=2) - return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_" - -def isdigit(char): - import warnings - warnings.warn('sre_parse.isdigit() will be removed in 3.5', - DeprecationWarning, stacklevel=2) - return "0" <= char <= "9" - -def isname(name): - import warnings - warnings.warn('sre_parse.isname() will be removed in 3.5', - DeprecationWarning, stacklevel=2) - # check that group name is a valid string - if not isident(name[0]): - return False - for char in name[1:]: - if not isident(char) and not isdigit(char): - return False - return True - def _class_escape(source, escape): # handle escape code inside character class code = ESCAPES.get(escape) @@ -351,9 +324,7 @@ def _class_escape(source, escape): raise ValueError if len(escape) == 2: if c in ASCIILETTERS: - import warnings - warnings.warn('bad escape %s' % escape, - DeprecationWarning, stacklevel=8) + raise source.error('bad escape %s' % escape, len(escape)) return LITERAL, ord(escape[1]) except ValueError: pass @@ -418,9 +389,7 @@ def _escape(source, escape, state): raise source.error("invalid group reference", len(escape)) if len(escape) == 2: if c in ASCIILETTERS: - import warnings - warnings.warn('bad escape %s' % escape, - DeprecationWarning, stacklevel=8) + raise source.error("bad escape %s" % escape, len(escape)) return LITERAL, ord(escape[1]) except ValueError: pass @@ -798,10 +767,7 @@ def fix_flags(src, flags): # Check and fix flags according to the type of pattern (str or bytes) if isinstance(src, str): if flags & SRE_FLAG_LOCALE: - import warnings - warnings.warn("LOCALE flag with a str pattern is deprecated. " - "Will be an error in 3.6", - DeprecationWarning, stacklevel=6) + raise ValueError("cannot use LOCALE flag with a str pattern") if not flags & SRE_FLAG_ASCII: flags |= SRE_FLAG_UNICODE elif flags & SRE_FLAG_UNICODE: @@ -810,10 +776,7 @@ def fix_flags(src, flags): if flags & SRE_FLAG_UNICODE: raise ValueError("cannot use UNICODE flag with a bytes pattern") if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII: - import warnings - warnings.warn("ASCII and LOCALE flags are incompatible. " - "Will be an error in 3.6", - DeprecationWarning, stacklevel=6) + raise ValueError("ASCII and LOCALE flags are incompatible") return flags def parse(str, flags=0, pattern=None): @@ -914,9 +877,7 @@ def parse_template(source, pattern): this = chr(ESCAPES[this][1]) except KeyError: if c in ASCIILETTERS: - import warnings - warnings.warn('bad escape %s' % this, - DeprecationWarning, stacklevel=4) + raise s.error('bad escape %s' % this, len(this)) lappend(this) else: lappend(this) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 7a741416b48..e27591c4fc0 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -124,7 +124,7 @@ class ReTests(unittest.TestCase): (chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8))) for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ': with self.subTest(c): - with self.assertWarns(DeprecationWarning): + with self.assertRaises(re.error): self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c) self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest') @@ -633,14 +633,10 @@ class ReTests(unittest.TestCase): re.purge() # for warnings for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY': with self.subTest(c): - with self.assertWarns(DeprecationWarning): - self.assertEqual(re.fullmatch('\\%c' % c, c).group(), c) - self.assertIsNone(re.match('\\%c' % c, 'a')) + self.assertRaises(re.error, re.compile, '\\%c' % c) for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ': with self.subTest(c): - with self.assertWarns(DeprecationWarning): - self.assertEqual(re.fullmatch('[\\%c]' % c, c).group(), c) - self.assertIsNone(re.match('[\\%c]' % c, 'a')) + self.assertRaises(re.error, re.compile, '[\\%c]' % c) def test_string_boundaries(self): # See http://bugs.python.org/issue10713 @@ -993,10 +989,8 @@ class ReTests(unittest.TestCase): self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i]))) self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0")) self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z")) - with self.assertWarns(DeprecationWarning): - self.assertTrue(re.match(br"\u1234", b'u1234')) - with self.assertWarns(DeprecationWarning): - self.assertTrue(re.match(br"\U00012345", b'U00012345')) + self.assertRaises(re.error, re.compile, br"\u1234") + self.assertRaises(re.error, re.compile, br"\U00012345") self.assertTrue(re.match(br"\0", b"\000")) self.assertTrue(re.match(br"\08", b"\0008")) self.assertTrue(re.match(br"\01", b"\001")) @@ -1018,10 +1012,8 @@ class ReTests(unittest.TestCase): self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i]))) self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i]))) self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i]))) - with self.assertWarns(DeprecationWarning): - self.assertTrue(re.match(br"[\u1234]", b'u')) - with self.assertWarns(DeprecationWarning): - self.assertTrue(re.match(br"[\U00012345]", b'U')) + self.assertRaises(re.error, re.compile, br"[\u1234]") + self.assertRaises(re.error, re.compile, br"[\U00012345]") self.checkPatternError(br"[\567]", r'octal escape value \567 outside of ' r'range 0-0o377', 1) @@ -1363,12 +1355,12 @@ class ReTests(unittest.TestCase): if bletter: self.assertIsNone(pat.match(bletter)) # Incompatibilities - self.assertWarns(DeprecationWarning, re.compile, '', re.LOCALE) - self.assertWarns(DeprecationWarning, re.compile, '(?L)') - self.assertWarns(DeprecationWarning, re.compile, b'', re.LOCALE | re.ASCII) - self.assertWarns(DeprecationWarning, re.compile, b'(?L)', re.ASCII) - self.assertWarns(DeprecationWarning, re.compile, b'(?a)', re.LOCALE) - self.assertWarns(DeprecationWarning, re.compile, b'(?aL)') + self.assertRaises(ValueError, re.compile, '', re.LOCALE) + self.assertRaises(ValueError, re.compile, '(?L)') + self.assertRaises(ValueError, re.compile, b'', re.LOCALE | re.ASCII) + self.assertRaises(ValueError, re.compile, b'(?L)', re.ASCII) + self.assertRaises(ValueError, re.compile, b'(?a)', re.LOCALE) + self.assertRaises(ValueError, re.compile, b'(?aL)') def test_bug_6509(self): # Replacement strings of both types must parse properly. @@ -1419,13 +1411,6 @@ class ReTests(unittest.TestCase): # Test behaviour when not given a string or pattern as parameter self.assertRaises(TypeError, re.compile, 0) - def test_bug_13899(self): - # Issue #13899: re pattern r"[\A]" should work like "A" but matches - # nothing. Ditto B and Z. - with self.assertWarns(DeprecationWarning): - self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'), - ['A', 'B', '\b', 'C', 'Z']) - @bigmemtest(size=_2G, memuse=1) def test_large_search(self, size): # Issue #10182: indices were 32-bit-truncated. diff --git a/Misc/NEWS b/Misc/NEWS index 9e95932dbf2..b5cf3148923 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -38,6 +38,9 @@ Core and Builtins Library ------- +- Issue #27030: Unknown escapes consisting of ``'\'`` and ASCII letter in + regular expressions now are errors. + - Issue #27186: Add os.PathLike support to DirEntry (part of PEP 519). Initial patch by Jelle Zijlstra.