mirror of https://github.com/python/cpython
Issue 9396. Apply functools.lru_cache in the place of the
random flushing cache in the re module.
This commit is contained in:
parent
cca65313c4
commit
4f859ed9c7
32
Lib/re.py
32
Lib/re.py
|
@ -118,6 +118,7 @@ This module also defines an exception 'error'.
|
||||||
import sys
|
import sys
|
||||||
import sre_compile
|
import sre_compile
|
||||||
import sre_parse
|
import sre_parse
|
||||||
|
import functools
|
||||||
|
|
||||||
# public symbols
|
# public symbols
|
||||||
__all__ = [ "match", "search", "sub", "subn", "split", "findall",
|
__all__ = [ "match", "search", "sub", "subn", "split", "findall",
|
||||||
|
@ -205,9 +206,9 @@ def compile(pattern, flags=0):
|
||||||
return _compile(pattern, flags)
|
return _compile(pattern, flags)
|
||||||
|
|
||||||
def purge():
|
def purge():
|
||||||
"Clear the regular expression cache"
|
"Clear the regular expression caches"
|
||||||
_cache.clear()
|
_compile_typed.clear()
|
||||||
_cache_repl.clear()
|
_compile_repl.clear()
|
||||||
|
|
||||||
def template(pattern, flags=0):
|
def template(pattern, flags=0):
|
||||||
"Compile a template pattern, returning a pattern object"
|
"Compile a template pattern, returning a pattern object"
|
||||||
|
@ -289,12 +290,12 @@ def _shrink_cache(cache_dict, max_length, divisor=5):
|
||||||
# Ignore problems if the cache changed from another thread.
|
# Ignore problems if the cache changed from another thread.
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _compile(*key):
|
def _compile(*args):
|
||||||
|
return _compile_typed(type(args[0]), *args)
|
||||||
|
|
||||||
|
@functools.lru_cache(maxsize=_MAXCACHE)
|
||||||
|
def _compile_typed(type, *key):
|
||||||
# internal: compile pattern
|
# internal: compile pattern
|
||||||
cachekey = (type(key[0]),) + key
|
|
||||||
p = _cache.get(cachekey)
|
|
||||||
if p is not None:
|
|
||||||
return p
|
|
||||||
pattern, flags = key
|
pattern, flags = key
|
||||||
if isinstance(pattern, _pattern_type):
|
if isinstance(pattern, _pattern_type):
|
||||||
if flags:
|
if flags:
|
||||||
|
@ -303,23 +304,14 @@ def _compile(*key):
|
||||||
return pattern
|
return pattern
|
||||||
if not sre_compile.isstring(pattern):
|
if not sre_compile.isstring(pattern):
|
||||||
raise TypeError("first argument must be string or compiled pattern")
|
raise TypeError("first argument must be string or compiled pattern")
|
||||||
p = sre_compile.compile(pattern, flags)
|
return sre_compile.compile(pattern, flags)
|
||||||
if len(_cache) >= _MAXCACHE:
|
|
||||||
_shrink_cache(_cache, _MAXCACHE)
|
|
||||||
_cache[cachekey] = p
|
|
||||||
return p
|
return p
|
||||||
|
|
||||||
|
@functools.lru_cache(maxsize=_MAXCACHE)
|
||||||
def _compile_repl(*key):
|
def _compile_repl(*key):
|
||||||
# internal: compile replacement pattern
|
# internal: compile replacement pattern
|
||||||
p = _cache_repl.get(key)
|
|
||||||
if p is not None:
|
|
||||||
return p
|
|
||||||
repl, pattern = key
|
repl, pattern = key
|
||||||
p = sre_parse.parse_template(repl, pattern)
|
return sre_parse.parse_template(repl, pattern)
|
||||||
if len(_cache_repl) >= _MAXCACHE:
|
|
||||||
_shrink_cache(_cache_repl, _MAXCACHE)
|
|
||||||
_cache_repl[key] = p
|
|
||||||
return p
|
|
||||||
|
|
||||||
def _expand(pattern, match, template):
|
def _expand(pattern, match, template):
|
||||||
# internal: match.expand implementation hook
|
# internal: match.expand implementation hook
|
||||||
|
|
|
@ -875,70 +875,8 @@ def run_re_tests():
|
||||||
print('=== Fails on unicode-sensitive match', t)
|
print('=== Fails on unicode-sensitive match', t)
|
||||||
|
|
||||||
|
|
||||||
class ReCacheTests(unittest.TestCase):
|
|
||||||
"""These tests are specific to the re._shrink_cache implementation."""
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
self._orig_maxcache = re._MAXCACHE
|
|
||||||
|
|
||||||
def tearDown(self):
|
|
||||||
re._MAXCACHE = self._orig_maxcache
|
|
||||||
|
|
||||||
def test_compile_cache_overflow(self):
|
|
||||||
# NOTE: If a profiler or debugger is tracing code and compiling
|
|
||||||
# regular expressions while tracing through this test... expect
|
|
||||||
# the test to fail. This test is not concurrency safe.
|
|
||||||
|
|
||||||
# Explicitly fill the caches.
|
|
||||||
re._MAXCACHE = 20
|
|
||||||
max_cache = re._MAXCACHE
|
|
||||||
unique_chars = tuple(chr(char_num) for char_num in
|
|
||||||
range(b'a'[0], b'a'[0]+max_cache))
|
|
||||||
re._cache.clear()
|
|
||||||
for char in unique_chars:
|
|
||||||
re._compile(char, 0)
|
|
||||||
self.assertEqual(max_cache, len(re._cache))
|
|
||||||
re._cache_repl.clear()
|
|
||||||
for char in unique_chars:
|
|
||||||
re._compile_repl(char*2, char)
|
|
||||||
self.assertEqual(max_cache, len(re._cache_repl))
|
|
||||||
|
|
||||||
# Overflow both caches and make sure they have extra room left
|
|
||||||
# afterwards as well as having more than a single entry.
|
|
||||||
re._compile('A', 0)
|
|
||||||
self.assertLess(len(re._cache), max_cache)
|
|
||||||
self.assertGreater(len(re._cache), 1)
|
|
||||||
re._compile_repl('A', 'A')
|
|
||||||
self.assertLess(len(re._cache_repl), max_cache)
|
|
||||||
self.assertGreater(len(re._cache_repl), 1)
|
|
||||||
|
|
||||||
def test_shrink_cache_at_limit(self):
|
|
||||||
cache = dict(zip(range(6), range(6)))
|
|
||||||
re._shrink_cache(cache, 6, divisor=3)
|
|
||||||
self.assertEqual(4, len(cache))
|
|
||||||
|
|
||||||
def test_shrink_cache_empty(self):
|
|
||||||
cache = {}
|
|
||||||
re._shrink_cache(cache, 6, divisor=3)
|
|
||||||
# Cache was empty, make sure we didn't raise an exception.
|
|
||||||
self.assertEqual(0, len(cache))
|
|
||||||
|
|
||||||
def test_shrink_cache_overflowing(self):
|
|
||||||
cache = dict(zip(range(6), range(6)))
|
|
||||||
re._shrink_cache(cache, 4, divisor=2)
|
|
||||||
# Cache was larger than the maximum, be sure we shrunk to smaller.
|
|
||||||
self.assertEqual(2, len(cache))
|
|
||||||
|
|
||||||
def test_shrink_cache_underflow(self):
|
|
||||||
cache = dict(zip(range(6), range(6)))
|
|
||||||
# No shrinking to do.
|
|
||||||
re._shrink_cache(cache, 9, divisor=3)
|
|
||||||
self.assertEqual(6, len(cache))
|
|
||||||
|
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
run_unittest(ReTests)
|
run_unittest(ReTests)
|
||||||
run_unittest(ReCacheTests)
|
|
||||||
run_re_tests()
|
run_re_tests()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -697,8 +697,8 @@ Library
|
||||||
|
|
||||||
- The default size of the re module's compiled regular expression cache has been
|
- The default size of the re module's compiled regular expression cache has been
|
||||||
increased from 100 to 500 and the cache replacement policy has changed from
|
increased from 100 to 500 and the cache replacement policy has changed from
|
||||||
simply clearing the entire cache on overflow to randomly forgetting 20% of the
|
simply clearing the entire cache on overflow to forgetting the least recently
|
||||||
existing cached compiled regular expressions. This is a performance win for
|
used cached compiled regular expressions. This is a performance win for
|
||||||
applications that use a lot of regular expressions and limits the impact of
|
applications that use a lot of regular expressions and limits the impact of
|
||||||
the performance hit anytime the cache is exceeded.
|
the performance hit anytime the cache is exceeded.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue