diff --git a/Lib/re.py b/Lib/re.py index 2f1a76eaa52..269eaef8554 100644 --- a/Lib/re.py +++ b/Lib/re.py @@ -118,6 +118,7 @@ This module also defines an exception 'error'. import sys import sre_compile import sre_parse +import functools # public symbols __all__ = [ "match", "search", "sub", "subn", "split", "findall", @@ -205,9 +206,9 @@ def compile(pattern, flags=0): return _compile(pattern, flags) def purge(): - "Clear the regular expression cache" - _cache.clear() - _cache_repl.clear() + "Clear the regular expression caches" + _compile_typed.clear() + _compile_repl.clear() def template(pattern, flags=0): "Compile a template pattern, returning a pattern object" @@ -289,12 +290,12 @@ def _shrink_cache(cache_dict, max_length, divisor=5): # Ignore problems if the cache changed from another thread. pass -def _compile(*key): +def _compile(*args): + return _compile_typed(type(args[0]), *args) + +@functools.lru_cache(maxsize=_MAXCACHE) +def _compile_typed(type, *key): # internal: compile pattern - cachekey = (type(key[0]),) + key - p = _cache.get(cachekey) - if p is not None: - return p pattern, flags = key if isinstance(pattern, _pattern_type): if flags: @@ -303,23 +304,14 @@ def _compile(*key): return pattern if not sre_compile.isstring(pattern): raise TypeError("first argument must be string or compiled pattern") - p = sre_compile.compile(pattern, flags) - if len(_cache) >= _MAXCACHE: - _shrink_cache(_cache, _MAXCACHE) - _cache[cachekey] = p + return sre_compile.compile(pattern, flags) return p +@functools.lru_cache(maxsize=_MAXCACHE) def _compile_repl(*key): # internal: compile replacement pattern - p = _cache_repl.get(key) - if p is not None: - return p repl, pattern = key - p = sre_parse.parse_template(repl, pattern) - if len(_cache_repl) >= _MAXCACHE: - _shrink_cache(_cache_repl, _MAXCACHE) - _cache_repl[key] = p - return p + return sre_parse.parse_template(repl, pattern) def _expand(pattern, match, template): # internal: match.expand implementation hook diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 6b11685e9c8..96a83b88c18 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -875,70 +875,8 @@ def run_re_tests(): print('=== Fails on unicode-sensitive match', t) -class ReCacheTests(unittest.TestCase): - """These tests are specific to the re._shrink_cache implementation.""" - - def setUp(self): - self._orig_maxcache = re._MAXCACHE - - def tearDown(self): - re._MAXCACHE = self._orig_maxcache - - def test_compile_cache_overflow(self): - # NOTE: If a profiler or debugger is tracing code and compiling - # regular expressions while tracing through this test... expect - # the test to fail. This test is not concurrency safe. - - # Explicitly fill the caches. - re._MAXCACHE = 20 - max_cache = re._MAXCACHE - unique_chars = tuple(chr(char_num) for char_num in - range(b'a'[0], b'a'[0]+max_cache)) - re._cache.clear() - for char in unique_chars: - re._compile(char, 0) - self.assertEqual(max_cache, len(re._cache)) - re._cache_repl.clear() - for char in unique_chars: - re._compile_repl(char*2, char) - self.assertEqual(max_cache, len(re._cache_repl)) - - # Overflow both caches and make sure they have extra room left - # afterwards as well as having more than a single entry. - re._compile('A', 0) - self.assertLess(len(re._cache), max_cache) - self.assertGreater(len(re._cache), 1) - re._compile_repl('A', 'A') - self.assertLess(len(re._cache_repl), max_cache) - self.assertGreater(len(re._cache_repl), 1) - - def test_shrink_cache_at_limit(self): - cache = dict(zip(range(6), range(6))) - re._shrink_cache(cache, 6, divisor=3) - self.assertEqual(4, len(cache)) - - def test_shrink_cache_empty(self): - cache = {} - re._shrink_cache(cache, 6, divisor=3) - # Cache was empty, make sure we didn't raise an exception. - self.assertEqual(0, len(cache)) - - def test_shrink_cache_overflowing(self): - cache = dict(zip(range(6), range(6))) - re._shrink_cache(cache, 4, divisor=2) - # Cache was larger than the maximum, be sure we shrunk to smaller. - self.assertEqual(2, len(cache)) - - def test_shrink_cache_underflow(self): - cache = dict(zip(range(6), range(6))) - # No shrinking to do. - re._shrink_cache(cache, 9, divisor=3) - self.assertEqual(6, len(cache)) - - def test_main(): run_unittest(ReTests) - run_unittest(ReCacheTests) run_re_tests() if __name__ == "__main__": diff --git a/Misc/NEWS b/Misc/NEWS index 00188d4f946..57bcf8ea5f3 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -697,8 +697,8 @@ Library - The default size of the re module's compiled regular expression cache has been increased from 100 to 500 and the cache replacement policy has changed from - simply clearing the entire cache on overflow to randomly forgetting 20% of the - existing cached compiled regular expressions. This is a performance win for + simply clearing the entire cache on overflow to forgetting the least recently + used cached compiled regular expressions. This is a performance win for applications that use a lot of regular expressions and limits the impact of the performance hit anytime the cache is exceeded.