Issue #22410: Module level functions in the re module now cache compiled

locale-dependent regular expressions taking into account the locale.
This commit is contained in:
Serhiy Storchaka 2014-10-31 00:56:45 +02:00
commit 7cc0a1f7cb
3 changed files with 49 additions and 2 deletions

View File

@ -122,6 +122,7 @@ This module also defines an exception 'error'.
import sys import sys
import sre_compile import sre_compile
import sre_parse import sre_parse
import _locale
# public symbols # public symbols
__all__ = [ __all__ = [
@ -274,7 +275,9 @@ _MAXCACHE = 512
def _compile(pattern, flags): def _compile(pattern, flags):
# internal: compile pattern # internal: compile pattern
try: try:
return _cache[type(pattern), pattern, flags] p, loc = _cache[type(pattern), pattern, flags]
if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE):
return p
except KeyError: except KeyError:
pass pass
if isinstance(pattern, _pattern_type): if isinstance(pattern, _pattern_type):
@ -288,7 +291,11 @@ def _compile(pattern, flags):
if not (flags & DEBUG): if not (flags & DEBUG):
if len(_cache) >= _MAXCACHE: if len(_cache) >= _MAXCACHE:
_cache.clear() _cache.clear()
_cache[type(pattern), pattern, flags] = p if p.flags & LOCALE:
loc = _locale.setlocale(_locale.LC_CTYPE)
else:
loc = None
_cache[type(pattern), pattern, flags] = p, loc
return p return p
def _compile_repl(repl, pattern): def _compile_repl(repl, pattern):

View File

@ -1,6 +1,7 @@
from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \ from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
cpython_only, captured_stdout cpython_only, captured_stdout
import io import io
import locale
import re import re
from re import Scanner from re import Scanner
import sre_compile import sre_compile
@ -1276,6 +1277,42 @@ subpattern None
# with ignore case. # with ignore case.
self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3)) self.assertEqual(re.fullmatch('[a-c]+', 'ABC', re.I).span(), (0, 3))
def test_locale_caching(self):
# Issue #22410
oldlocale = locale.setlocale(locale.LC_CTYPE)
self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
for loc in 'en_US.iso88591', 'en_US.utf8':
try:
locale.setlocale(locale.LC_CTYPE, loc)
except locale.Error:
# Unsupported locale on this system
self.skipTest('test needs %s locale' % loc)
re.purge()
self.check_en_US_iso88591()
self.check_en_US_utf8()
re.purge()
self.check_en_US_utf8()
self.check_en_US_iso88591()
def check_en_US_iso88591(self):
locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
self.assertTrue(re.match(b'\xc5', b'\xe5', re.L|re.I))
self.assertTrue(re.match(b'\xe5', b'\xc5', re.L|re.I))
self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
self.assertTrue(re.match(b'(?Li)\xc5', b'\xe5'))
self.assertTrue(re.match(b'(?Li)\xe5', b'\xc5'))
def check_en_US_utf8(self):
locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
self.assertTrue(re.match(b'\xc5\xe5', b'\xc5\xe5', re.L|re.I))
self.assertIsNone(re.match(b'\xc5', b'\xe5', re.L|re.I))
self.assertIsNone(re.match(b'\xe5', b'\xc5', re.L|re.I))
self.assertTrue(re.match(b'(?Li)\xc5\xe5', b'\xc5\xe5'))
self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
class PatternReprTests(unittest.TestCase): class PatternReprTests(unittest.TestCase):
def check(self, pattern, expected): def check(self, pattern, expected):

View File

@ -180,6 +180,9 @@ Core and Builtins
Library Library
------- -------
- Issue #22410: Module level functions in the re module now cache compiled
locale-dependent regular expressions taking into account the locale.
- Issue #22759: Query methods on pathlib.Path() (exists(), is_dir(), etc.) - Issue #22759: Query methods on pathlib.Path() (exists(), is_dir(), etc.)
now return False when the underlying stat call raises NotADirectoryError. now return False when the underlying stat call raises NotADirectoryError.