mirror of https://github.com/python/cpython
gh-96346: Use double caching for re._compile() (#96347)
This commit is contained in:
parent
eed80458e8
commit
c11b667a1d
|
@ -229,6 +229,7 @@ def compile(pattern, flags=0):
|
||||||
def purge():
|
def purge():
|
||||||
"Clear the regular expression caches"
|
"Clear the regular expression caches"
|
||||||
_cache.clear()
|
_cache.clear()
|
||||||
|
_cache2.clear()
|
||||||
_compile_repl.cache_clear()
|
_compile_repl.cache_clear()
|
||||||
|
|
||||||
def template(pattern, flags=0):
|
def template(pattern, flags=0):
|
||||||
|
@ -266,40 +267,64 @@ Match = type(_compiler.compile('', 0).match(''))
|
||||||
# --------------------------------------------------------------------
|
# --------------------------------------------------------------------
|
||||||
# internals
|
# internals
|
||||||
|
|
||||||
_cache = {} # ordered!
|
# Use the fact that dict keeps the insertion order.
|
||||||
|
# _cache2 uses the simple FIFO policy which has better latency.
|
||||||
|
# _cache uses the LRU policy which has better hit rate.
|
||||||
|
_cache = {} # LRU
|
||||||
|
_cache2 = {} # FIFO
|
||||||
_MAXCACHE = 512
|
_MAXCACHE = 512
|
||||||
|
_MAXCACHE2 = 256
|
||||||
|
assert _MAXCACHE2 < _MAXCACHE
|
||||||
|
|
||||||
def _compile(pattern, flags):
|
def _compile(pattern, flags):
|
||||||
# internal: compile pattern
|
# internal: compile pattern
|
||||||
if isinstance(flags, RegexFlag):
|
if isinstance(flags, RegexFlag):
|
||||||
flags = flags.value
|
flags = flags.value
|
||||||
try:
|
try:
|
||||||
return _cache[type(pattern), pattern, flags]
|
return _cache2[type(pattern), pattern, flags]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
if isinstance(pattern, Pattern):
|
|
||||||
if flags:
|
key = (type(pattern), pattern, flags)
|
||||||
raise ValueError(
|
# Item in _cache should be moved to the end if found.
|
||||||
"cannot process flags argument with a compiled pattern")
|
p = _cache.pop(key, None)
|
||||||
return pattern
|
if p is None:
|
||||||
if not _compiler.isstring(pattern):
|
if isinstance(pattern, Pattern):
|
||||||
raise TypeError("first argument must be string or compiled pattern")
|
if flags:
|
||||||
if flags & T:
|
raise ValueError(
|
||||||
import warnings
|
"cannot process flags argument with a compiled pattern")
|
||||||
warnings.warn("The re.TEMPLATE/re.T flag is deprecated "
|
return pattern
|
||||||
"as it is an undocumented flag "
|
if not _compiler.isstring(pattern):
|
||||||
"without an obvious purpose. "
|
raise TypeError("first argument must be string or compiled pattern")
|
||||||
"Don't use it.",
|
if flags & T:
|
||||||
DeprecationWarning)
|
import warnings
|
||||||
p = _compiler.compile(pattern, flags)
|
warnings.warn("The re.TEMPLATE/re.T flag is deprecated "
|
||||||
if not (flags & DEBUG):
|
"as it is an undocumented flag "
|
||||||
|
"without an obvious purpose. "
|
||||||
|
"Don't use it.",
|
||||||
|
DeprecationWarning)
|
||||||
|
p = _compiler.compile(pattern, flags)
|
||||||
|
if flags & DEBUG:
|
||||||
|
return p
|
||||||
if len(_cache) >= _MAXCACHE:
|
if len(_cache) >= _MAXCACHE:
|
||||||
# Drop the oldest item
|
# Drop the least recently used item.
|
||||||
|
# next(iter(_cache)) is known to have linear amortized time,
|
||||||
|
# but it is used here to avoid a dependency from using OrderedDict.
|
||||||
|
# For the small _MAXCACHE value it doesn't make much of a difference.
|
||||||
try:
|
try:
|
||||||
del _cache[next(iter(_cache))]
|
del _cache[next(iter(_cache))]
|
||||||
except (StopIteration, RuntimeError, KeyError):
|
except (StopIteration, RuntimeError, KeyError):
|
||||||
pass
|
pass
|
||||||
_cache[type(pattern), pattern, flags] = p
|
# Append to the end.
|
||||||
|
_cache[key] = p
|
||||||
|
|
||||||
|
if len(_cache2) >= _MAXCACHE2:
|
||||||
|
# Drop the oldest item.
|
||||||
|
try:
|
||||||
|
del _cache2[next(iter(_cache2))]
|
||||||
|
except (StopIteration, RuntimeError, KeyError):
|
||||||
|
pass
|
||||||
|
_cache2[key] = p
|
||||||
return p
|
return p
|
||||||
|
|
||||||
@functools.lru_cache(_MAXCACHE)
|
@functools.lru_cache(_MAXCACHE)
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Use double caching for compiled RE patterns.
|
Loading…
Reference in New Issue