mirror of https://github.com/python/cpython
gh-96346: Use double caching for re._compile() (#96347)
This commit is contained in:
parent
eed80458e8
commit
c11b667a1d
|
@ -229,6 +229,7 @@ def compile(pattern, flags=0):
|
|||
def purge():
|
||||
"Clear the regular expression caches"
|
||||
_cache.clear()
|
||||
_cache2.clear()
|
||||
_compile_repl.cache_clear()
|
||||
|
||||
def template(pattern, flags=0):
|
||||
|
@ -266,40 +267,64 @@ Match = type(_compiler.compile('', 0).match(''))
|
|||
# --------------------------------------------------------------------
|
||||
# internals
|
||||
|
||||
_cache = {} # ordered!
|
||||
|
||||
# Use the fact that dict keeps the insertion order.
|
||||
# _cache2 uses the simple FIFO policy which has better latency.
|
||||
# _cache uses the LRU policy which has better hit rate.
|
||||
_cache = {} # LRU
|
||||
_cache2 = {} # FIFO
|
||||
_MAXCACHE = 512
|
||||
_MAXCACHE2 = 256
|
||||
assert _MAXCACHE2 < _MAXCACHE
|
||||
|
||||
def _compile(pattern, flags):
|
||||
# internal: compile pattern
|
||||
if isinstance(flags, RegexFlag):
|
||||
flags = flags.value
|
||||
try:
|
||||
return _cache[type(pattern), pattern, flags]
|
||||
return _cache2[type(pattern), pattern, flags]
|
||||
except KeyError:
|
||||
pass
|
||||
if isinstance(pattern, Pattern):
|
||||
if flags:
|
||||
raise ValueError(
|
||||
"cannot process flags argument with a compiled pattern")
|
||||
return pattern
|
||||
if not _compiler.isstring(pattern):
|
||||
raise TypeError("first argument must be string or compiled pattern")
|
||||
if flags & T:
|
||||
import warnings
|
||||
warnings.warn("The re.TEMPLATE/re.T flag is deprecated "
|
||||
"as it is an undocumented flag "
|
||||
"without an obvious purpose. "
|
||||
"Don't use it.",
|
||||
DeprecationWarning)
|
||||
p = _compiler.compile(pattern, flags)
|
||||
if not (flags & DEBUG):
|
||||
|
||||
key = (type(pattern), pattern, flags)
|
||||
# Item in _cache should be moved to the end if found.
|
||||
p = _cache.pop(key, None)
|
||||
if p is None:
|
||||
if isinstance(pattern, Pattern):
|
||||
if flags:
|
||||
raise ValueError(
|
||||
"cannot process flags argument with a compiled pattern")
|
||||
return pattern
|
||||
if not _compiler.isstring(pattern):
|
||||
raise TypeError("first argument must be string or compiled pattern")
|
||||
if flags & T:
|
||||
import warnings
|
||||
warnings.warn("The re.TEMPLATE/re.T flag is deprecated "
|
||||
"as it is an undocumented flag "
|
||||
"without an obvious purpose. "
|
||||
"Don't use it.",
|
||||
DeprecationWarning)
|
||||
p = _compiler.compile(pattern, flags)
|
||||
if flags & DEBUG:
|
||||
return p
|
||||
if len(_cache) >= _MAXCACHE:
|
||||
# Drop the oldest item
|
||||
# Drop the least recently used item.
|
||||
# next(iter(_cache)) is known to have linear amortized time,
|
||||
# but it is used here to avoid a dependency from using OrderedDict.
|
||||
# For the small _MAXCACHE value it doesn't make much of a difference.
|
||||
try:
|
||||
del _cache[next(iter(_cache))]
|
||||
except (StopIteration, RuntimeError, KeyError):
|
||||
pass
|
||||
_cache[type(pattern), pattern, flags] = p
|
||||
# Append to the end.
|
||||
_cache[key] = p
|
||||
|
||||
if len(_cache2) >= _MAXCACHE2:
|
||||
# Drop the oldest item.
|
||||
try:
|
||||
del _cache2[next(iter(_cache2))]
|
||||
except (StopIteration, RuntimeError, KeyError):
|
||||
pass
|
||||
_cache2[key] = p
|
||||
return p
|
||||
|
||||
@functools.lru_cache(_MAXCACHE)
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Use double caching for compiled RE patterns.
|
Loading…
Reference in New Issue