Issue #22410: Module level functions in the re module now cache compiled
locale-dependent regular expressions taking into account the locale.
This commit is contained in:
parent
65566984b0
commit
d4c7290368
16
Lib/re.py
16
Lib/re.py
|
@ -104,6 +104,7 @@ This module also defines an exception 'error'.
|
|||
import sys
|
||||
import sre_compile
|
||||
import sre_parse
|
||||
import _locale
|
||||
|
||||
# public symbols
|
||||
__all__ = [ "match", "search", "sub", "subn", "split", "findall",
|
||||
|
@ -229,9 +230,12 @@ def _compile(*key):
|
|||
bypass_cache = flags & DEBUG
|
||||
if not bypass_cache:
|
||||
cachekey = (type(key[0]),) + key
|
||||
p = _cache.get(cachekey)
|
||||
if p is not None:
|
||||
return p
|
||||
try:
|
||||
p, loc = _cache[cachekey]
|
||||
if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE):
|
||||
return p
|
||||
except KeyError:
|
||||
pass
|
||||
if isinstance(pattern, _pattern_type):
|
||||
if flags:
|
||||
raise ValueError('Cannot process flags argument with a compiled pattern')
|
||||
|
@ -245,7 +249,11 @@ def _compile(*key):
|
|||
if not bypass_cache:
|
||||
if len(_cache) >= _MAXCACHE:
|
||||
_cache.clear()
|
||||
_cache[cachekey] = p
|
||||
if p.flags & LOCALE:
|
||||
loc = _locale.setlocale(_locale.LC_CTYPE)
|
||||
else:
|
||||
loc = None
|
||||
_cache[cachekey] = p, loc
|
||||
return p
|
||||
|
||||
def _compile_repl(*key):
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
from test.test_support import verbose, run_unittest, import_module
|
||||
from test.test_support import precisionbigmemtest, _2G, cpython_only
|
||||
from test.test_support import captured_stdout, have_unicode, requires_unicode, u
|
||||
import locale
|
||||
import re
|
||||
from re import Scanner
|
||||
import sre_constants
|
||||
|
@ -975,6 +976,42 @@ subpattern None
|
|||
self.assertEqual(re.match("(foo)", "foo").group(1L), "foo")
|
||||
self.assertRaises(IndexError, re.match("", "").group, sys.maxint + 1)
|
||||
|
||||
def test_locale_caching(self):
|
||||
# Issue #22410
|
||||
oldlocale = locale.setlocale(locale.LC_CTYPE)
|
||||
self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
|
||||
for loc in 'en_US.iso88591', 'en_US.utf8':
|
||||
try:
|
||||
locale.setlocale(locale.LC_CTYPE, loc)
|
||||
except locale.Error:
|
||||
# Unsupported locale on this system
|
||||
self.skipTest('test needs %s locale' % loc)
|
||||
|
||||
re.purge()
|
||||
self.check_en_US_iso88591()
|
||||
self.check_en_US_utf8()
|
||||
re.purge()
|
||||
self.check_en_US_utf8()
|
||||
self.check_en_US_iso88591()
|
||||
|
||||
def check_en_US_iso88591(self):
|
||||
locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
|
||||
self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
|
||||
self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
|
||||
self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
|
||||
self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
|
||||
self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
|
||||
self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
|
||||
|
||||
def check_en_US_utf8(self):
|
||||
locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
|
||||
self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
|
||||
self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
|
||||
self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
|
||||
self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
|
||||
self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
|
||||
self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
|
||||
|
||||
|
||||
def run_re_tests():
|
||||
from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
|
||||
|
|
|
@ -37,6 +37,9 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
- Issue #22410: Module level functions in the re module now cache compiled
|
||||
locale-dependent regular expressions taking into account the locale.
|
||||
|
||||
- Issue #8876: distutils now falls back to copying files when hard linking
|
||||
doesn't work. This allows use with special filesystems such as VirtualBox
|
||||
shared folders.
|
||||
|
|
Loading…
Reference in New Issue