diff --git a/Doc/library/re.rst b/Doc/library/re.rst index d1823aa8081..60ded8b2f4a 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -521,7 +521,11 @@ form. current locale. The use of this flag is discouraged as the locale mechanism is very unreliable, and it only handles one "culture" at a time anyway; you should use Unicode matching instead, which is the default in Python 3 - for Unicode (str) patterns. + for Unicode (str) patterns. This flag makes sense only with bytes patterns. + + .. deprecated-removed:: 3.5 3.6 + Deprecated the use of :const:`re.LOCALE` with string patterns or + :const:`re.ASCII`. .. data:: M diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py index 8d9a09a860d..1a7d3162532 100644 --- a/Lib/sre_parse.py +++ b/Lib/sre_parse.py @@ -751,6 +751,11 @@ def _parse(source, state): def fix_flags(src, flags): # Check and fix flags according to the type of pattern (str or bytes) if isinstance(src, str): + if flags & SRE_FLAG_LOCALE: + import warnings + warnings.warn("LOCALE flag with a str pattern is deprecated. " + "Will be an error in 3.6", + DeprecationWarning, stacklevel=6) if not flags & SRE_FLAG_ASCII: flags |= SRE_FLAG_UNICODE elif flags & SRE_FLAG_UNICODE: @@ -758,6 +763,11 @@ def fix_flags(src, flags): else: if flags & SRE_FLAG_UNICODE: raise ValueError("can't use UNICODE flag with a bytes pattern") + if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII: + import warnings + warnings.warn("ASCII and LOCALE flags are incompatible. " + "Will be an error in 3.6", + DeprecationWarning, stacklevel=6) return flags def parse(str, flags=0, pattern=None): diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index f8b33a48981..6e90b2fec9a 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -502,10 +502,6 @@ class ReTests(unittest.TestCase): "abcd abc bcd bx", re.ASCII).group(1), "bx") self.assertEqual(re.search(r"\B(b.)\B", "abc bcd bc abxd", re.ASCII).group(1), "bx") - self.assertEqual(re.search(r"\b(b.)\b", - "abcd abc bcd bx", re.LOCALE).group(1), "bx") - self.assertEqual(re.search(r"\B(b.)\B", - "abc bcd bc abxd", re.LOCALE).group(1), "bx") self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc") self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc") self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M)) @@ -526,8 +522,6 @@ class ReTests(unittest.TestCase): b"1aa! a").group(0), b"1aa! a") self.assertEqual(re.search(r"\d\D\w\W\s\S", "1aa! a", re.ASCII).group(0), "1aa! a") - self.assertEqual(re.search(r"\d\D\w\W\s\S", - "1aa! a", re.LOCALE).group(0), "1aa! a") self.assertEqual(re.search(br"\d\D\w\W\s\S", b"1aa! a", re.LOCALE).group(0), b"1aa! a") @@ -693,9 +687,12 @@ class ReTests(unittest.TestCase): self.assertEqual(_sre.getlower(ord('A'), 0), ord('a')) self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a')) self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a')) + self.assertEqual(_sre.getlower(ord('A'), re.ASCII), ord('a')) self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC") self.assertEqual(re.match(b"abc", b"ABC", re.I).group(0), b"ABC") + self.assertEqual(re.match("abc", "ABC", re.I|re.A).group(0), "ABC") + self.assertEqual(re.match(b"abc", b"ABC", re.I|re.L).group(0), b"ABC") def test_not_literal(self): self.assertEqual(re.search("\s([^a])", " b").group(1), "b") @@ -780,8 +777,10 @@ class ReTests(unittest.TestCase): self.assertEqual(re.X, re.VERBOSE) def test_flags(self): - for flag in [re.I, re.M, re.X, re.S, re.L]: + for flag in [re.I, re.M, re.X, re.S, re.A, re.U]: self.assertTrue(re.compile('^pattern$', flag)) + for flag in [re.I, re.M, re.X, re.S, re.A, re.L]: + self.assertTrue(re.compile(b'^pattern$', flag)) def test_sre_character_literals(self): for i in [0, 8, 16, 32, 64, 127, 128, 255, 256, 0xFFFF, 0x10000, 0x10FFFF]: @@ -1146,6 +1145,52 @@ class ReTests(unittest.TestCase): self.assertRaises(ValueError, re.compile, '(?a)\w', re.UNICODE) self.assertRaises(ValueError, re.compile, '(?au)\w') + def test_locale_flag(self): + import locale + _, enc = locale.getlocale(locale.LC_CTYPE) + # Search non-ASCII letter + for i in range(128, 256): + try: + c = bytes([i]).decode(enc) + sletter = c.lower() + if sletter == c: continue + bletter = sletter.encode(enc) + if len(bletter) != 1: continue + if bletter.decode(enc) != sletter: continue + bpat = re.escape(bytes([i])) + break + except (UnicodeError, TypeError): + pass + else: + bletter = None + bpat = b'A' + # Bytes patterns + pat = re.compile(bpat, re.LOCALE | re.IGNORECASE) + if bletter: + self.assertTrue(pat.match(bletter)) + pat = re.compile(b'(?L)' + bpat, re.IGNORECASE) + if bletter: + self.assertTrue(pat.match(bletter)) + pat = re.compile(bpat, re.IGNORECASE) + if bletter: + self.assertIsNone(pat.match(bletter)) + pat = re.compile(b'\w', re.LOCALE) + if bletter: + self.assertTrue(pat.match(bletter)) + pat = re.compile(b'(?L)\w') + if bletter: + self.assertTrue(pat.match(bletter)) + pat = re.compile(b'\w') + if bletter: + self.assertIsNone(pat.match(bletter)) + # Incompatibilities + self.assertWarns(DeprecationWarning, re.compile, '', re.LOCALE) + self.assertWarns(DeprecationWarning, re.compile, '(?L)') + self.assertWarns(DeprecationWarning, re.compile, b'', re.LOCALE | re.ASCII) + self.assertWarns(DeprecationWarning, re.compile, b'(?L)', re.ASCII) + self.assertWarns(DeprecationWarning, re.compile, b'(?a)', re.LOCALE) + self.assertWarns(DeprecationWarning, re.compile, b'(?aL)') + def test_bug_6509(self): # Replacement strings of both types must parse properly. # all strings @@ -1477,6 +1522,10 @@ class PatternReprTests(unittest.TestCase): self.check_flags(b'bytes pattern', re.A, "re.compile(b'bytes pattern', re.ASCII)") + def test_locale(self): + self.check_flags(b'bytes pattern', re.L, + "re.compile(b'bytes pattern', re.LOCALE)") + def test_quotes(self): self.check('random "double quoted" pattern', '''re.compile('random "double quoted" pattern')''') @@ -1590,8 +1639,16 @@ class ExternalTests(unittest.TestCase): pass else: with self.subTest('bytes pattern match'): - bpat = re.compile(bpat) - self.assertTrue(bpat.search(bs)) + obj = re.compile(bpat) + self.assertTrue(obj.search(bs)) + + # Try the match with LOCALE enabled, and check that it + # still succeeds. + with self.subTest('locale-sensitive match'): + obj = re.compile(bpat, re.LOCALE) + result = obj.search(bs) + if result is None: + print('=== Fails on locale-sensitive match', t) # Try the match with the search area limited to the extent # of the match and see if it still succeeds. \B will @@ -1609,13 +1666,6 @@ class ExternalTests(unittest.TestCase): obj = re.compile(pattern, re.IGNORECASE) self.assertTrue(obj.search(s)) - # Try the match with LOCALE enabled, and check that it - # still succeeds. - if '(?u)' not in pattern: - with self.subTest('locale-sensitive match'): - obj = re.compile(pattern, re.LOCALE) - self.assertTrue(obj.search(s)) - # Try the match with UNICODE locale enabled, and check # that it still succeeds. with self.subTest('unicode-sensitive match'): diff --git a/Misc/NEWS b/Misc/NEWS index d3a02a6feb9..faac0924069 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -191,6 +191,9 @@ Core and Builtins Library ------- +- Issue #21032: Deprecated the use of re.LOCALE flag with str patterns or + re.ASCII. It was newer worked. + - Issue #22902: The "ip" command is now used on Linux to determine MAC address in uuid.getnode(). Pach by Bruno Cauet.