Issue 10586: change the new functools.lru_cache implementation to expose the maximum and current cache sizes through the public statistics API. This API is now a single function that returns a named tuple.

This commit is contained in:
Nick Coghlan 2010-11-30 06:19:46 +00:00
parent ff27ee0b40
commit 234515afe5
5 changed files with 69 additions and 19 deletions

View File

@ -52,10 +52,16 @@ The :mod:`functools` module defines the following functions:
results, the positional and keyword arguments to the function must be results, the positional and keyword arguments to the function must be
hashable. hashable.
The wrapped function is instrumented with two attributes, :attr:`cache_hits` The wrapped function is instrumented with a :attr:`cache_info` attribute that
and :attr:`cache_misses` which count the number of successful or unsuccessful can be called to retrieve a named tuple with the following fields:
cache lookups. These statistics are helpful for tuning the *maxsize*
parameter and for measuring the cache's effectiveness. - :attr:`maxsize`: maximum cache size (as set by the *maxsize* parameter)
- :attr:`size`: current number of entries in the cache
- :attr:`hits`: number of successful cache lookups
- :attr:`misses`: number of unsuccessful cache lookups.
These statistics are helpful for tuning the *maxsize* parameter and for measuring
the effectiveness of the cache.
The wrapped function also has a :attr:`cache_clear` attribute which can be The wrapped function also has a :attr:`cache_clear` attribute which can be
called (with no arguments) to clear the cache. called (with no arguments) to clear the cache.

View File

@ -332,6 +332,8 @@ New, Improved, and Deprecated Modules
c.execute('SELECT phonenumber FROM phonelist WHERE name=?', (name,)) c.execute('SELECT phonenumber FROM phonelist WHERE name=?', (name,))
return c.fetchone()[0] return c.fetchone()[0]
XXX: update for Issue 10586 changes to cache statistics API
To help with choosing an effective cache size, the wrapped function is To help with choosing an effective cache size, the wrapped function is
instrumented with two attributes *cache_hits* and *cache_misses*: instrumented with two attributes *cache_hits* and *cache_misses*:

View File

@ -12,7 +12,7 @@ __all__ = ['update_wrapper', 'wraps', 'WRAPPER_ASSIGNMENTS', 'WRAPPER_UPDATES',
'total_ordering', 'cmp_to_key', 'lru_cache', 'reduce', 'partial'] 'total_ordering', 'cmp_to_key', 'lru_cache', 'reduce', 'partial']
from _functools import partial, reduce from _functools import partial, reduce
from collections import OrderedDict from collections import OrderedDict, namedtuple
try: try:
from _thread import allocate_lock as Lock from _thread import allocate_lock as Lock
except: except:
@ -114,12 +114,15 @@ def cmp_to_key(mycmp):
raise TypeError('hash not implemented') raise TypeError('hash not implemented')
return K return K
_CacheInfo = namedtuple("CacheInfo", "maxsize, size, hits, misses")
def lru_cache(maxsize=100): def lru_cache(maxsize=100):
"""Least-recently-used cache decorator. """Least-recently-used cache decorator.
Arguments to the cached function must be hashable. Arguments to the cached function must be hashable.
Performance statistics stored in f.cache_hits and f.cache_misses. Significant statistics (maxsize, size, hits, misses) are
available through the f.cache_info() named tuple.
Clear the cache and statistics using f.cache_clear(). Clear the cache and statistics using f.cache_clear().
The underlying function is stored in f.__wrapped__. The underlying function is stored in f.__wrapped__.
@ -127,7 +130,7 @@ def lru_cache(maxsize=100):
""" """
# Users should only access the lru_cache through its public API: # Users should only access the lru_cache through its public API:
# cache_hits, cache_misses, cache_clear(), and __wrapped__ # cache_info, cache_clear, and f.__wrapped__
# The internals of the lru_cache are encapsulated for thread safety and # The internals of the lru_cache are encapsulated for thread safety and
# to allow the implementation to change (including a possible C version). # to allow the implementation to change (including a possible C version).
@ -137,11 +140,13 @@ def lru_cache(maxsize=100):
cache = OrderedDict() # ordered least recent to most recent cache = OrderedDict() # ordered least recent to most recent
cache_popitem = cache.popitem cache_popitem = cache.popitem
cache_renew = cache.move_to_end cache_renew = cache.move_to_end
hits = misses = 0
kwd_mark = object() # separate positional and keyword args kwd_mark = object() # separate positional and keyword args
lock = Lock() lock = Lock()
@wraps(user_function) @wraps(user_function)
def wrapper(*args, **kwds): def wrapper(*args, **kwds):
nonlocal hits, misses
key = args key = args
if kwds: if kwds:
key += (kwd_mark,) + tuple(sorted(kwds.items())) key += (kwd_mark,) + tuple(sorted(kwds.items()))
@ -149,23 +154,29 @@ def lru_cache(maxsize=100):
with lock: with lock:
result = cache[key] result = cache[key]
cache_renew(key) # record recent use of this key cache_renew(key) # record recent use of this key
wrapper.cache_hits += 1 hits += 1
except KeyError: except KeyError:
result = user_function(*args, **kwds) result = user_function(*args, **kwds)
with lock: with lock:
cache[key] = result # record recent use of this key cache[key] = result # record recent use of this key
wrapper.cache_misses += 1 misses += 1
if len(cache) > maxsize: if len(cache) > maxsize:
cache_popitem(0) # purge least recently used cache entry cache_popitem(0) # purge least recently used cache entry
return result return result
def cache_info():
"""Report significant cache statistics"""
with lock:
return _CacheInfo(maxsize, len(cache), hits, misses)
def cache_clear(): def cache_clear():
"""Clear the cache and cache statistics""" """Clear the cache and cache statistics"""
nonlocal hits, misses
with lock: with lock:
cache.clear() cache.clear()
wrapper.cache_hits = wrapper.cache_misses = 0 hits = misses = 0
wrapper.cache_hits = wrapper.cache_misses = 0 wrapper.cache_info = cache_info
wrapper.cache_clear = cache_clear wrapper.cache_clear = cache_clear
return wrapper return wrapper

View File

@ -501,6 +501,11 @@ class TestLRU(unittest.TestCase):
def orig(x, y): def orig(x, y):
return 3*x+y return 3*x+y
f = functools.lru_cache(maxsize=20)(orig) f = functools.lru_cache(maxsize=20)(orig)
maxsize, currsize, hits, misses = f.cache_info()
self.assertEqual(maxsize, 20)
self.assertEqual(currsize, 0)
self.assertEqual(hits, 0)
self.assertEqual(misses, 0)
domain = range(5) domain = range(5)
for i in range(1000): for i in range(1000):
@ -508,21 +513,29 @@ class TestLRU(unittest.TestCase):
actual = f(x, y) actual = f(x, y)
expected = orig(x, y) expected = orig(x, y)
self.assertEqual(actual, expected) self.assertEqual(actual, expected)
self.assertTrue(f.cache_hits > f.cache_misses) maxsize, currsize, hits, misses = f.cache_info()
self.assertEqual(f.cache_hits + f.cache_misses, 1000) self.assertTrue(hits > misses)
self.assertEqual(hits + misses, 1000)
self.assertEqual(currsize, 20)
f.cache_clear() # test clearing f.cache_clear() # test clearing
self.assertEqual(f.cache_hits, 0) maxsize, currsize, hits, misses = f.cache_info()
self.assertEqual(f.cache_misses, 0) self.assertEqual(hits, 0)
self.assertEqual(misses, 0)
self.assertEqual(currsize, 0)
f(x, y) f(x, y)
self.assertEqual(f.cache_hits, 0) maxsize, currsize, hits, misses = f.cache_info()
self.assertEqual(f.cache_misses, 1) self.assertEqual(hits, 0)
self.assertEqual(misses, 1)
self.assertEqual(currsize, 1)
# Test bypassing the cache # Test bypassing the cache
self.assertIs(f.__wrapped__, orig) self.assertIs(f.__wrapped__, orig)
f.__wrapped__(x, y) f.__wrapped__(x, y)
self.assertEqual(f.cache_hits, 0) maxsize, currsize, hits, misses = f.cache_info()
self.assertEqual(f.cache_misses, 1) self.assertEqual(hits, 0)
self.assertEqual(misses, 1)
self.assertEqual(currsize, 1)
# test size zero (which means "never-cache") # test size zero (which means "never-cache")
@functools.lru_cache(0) @functools.lru_cache(0)
@ -530,10 +543,15 @@ class TestLRU(unittest.TestCase):
nonlocal f_cnt nonlocal f_cnt
f_cnt += 1 f_cnt += 1
return 20 return 20
self.assertEqual(f.cache_info().maxsize, 0)
f_cnt = 0 f_cnt = 0
for i in range(5): for i in range(5):
self.assertEqual(f(), 20) self.assertEqual(f(), 20)
self.assertEqual(f_cnt, 5) self.assertEqual(f_cnt, 5)
maxsize, currsize, hits, misses = f.cache_info()
self.assertEqual(hits, 0)
self.assertEqual(misses, 5)
self.assertEqual(currsize, 0)
# test size one # test size one
@functools.lru_cache(1) @functools.lru_cache(1)
@ -541,10 +559,15 @@ class TestLRU(unittest.TestCase):
nonlocal f_cnt nonlocal f_cnt
f_cnt += 1 f_cnt += 1
return 20 return 20
self.assertEqual(f.cache_info().maxsize, 1)
f_cnt = 0 f_cnt = 0
for i in range(5): for i in range(5):
self.assertEqual(f(), 20) self.assertEqual(f(), 20)
self.assertEqual(f_cnt, 1) self.assertEqual(f_cnt, 1)
maxsize, currsize, hits, misses = f.cache_info()
self.assertEqual(hits, 4)
self.assertEqual(misses, 1)
self.assertEqual(currsize, 1)
# test size two # test size two
@functools.lru_cache(2) @functools.lru_cache(2)
@ -552,11 +575,16 @@ class TestLRU(unittest.TestCase):
nonlocal f_cnt nonlocal f_cnt
f_cnt += 1 f_cnt += 1
return x*10 return x*10
self.assertEqual(f.cache_info().maxsize, 2)
f_cnt = 0 f_cnt = 0
for x in 7, 9, 7, 9, 7, 9, 8, 8, 8, 9, 9, 9, 8, 8, 8, 7: for x in 7, 9, 7, 9, 7, 9, 8, 8, 8, 9, 9, 9, 8, 8, 8, 7:
# * * * * # * * * *
self.assertEqual(f(x), x*10) self.assertEqual(f(x), x*10)
self.assertEqual(f_cnt, 4) self.assertEqual(f_cnt, 4)
maxsize, currsize, hits, misses = f.cache_info()
self.assertEqual(hits, 12)
self.assertEqual(misses, 4)
self.assertEqual(currsize, 2)
def test_main(verbose=None): def test_main(verbose=None):
test_classes = ( test_classes = (

View File

@ -43,6 +43,9 @@ Core and Builtins
Library Library
------- -------
- Issue #10586: The statistics API for the new functools.lru_cache has
been changed to a single cache_info() method returning a named tuple.
- Issue #10323: itertools.islice() now consumes the minimum number of - Issue #10323: itertools.islice() now consumes the minimum number of
inputs before stopping. Formerly, the final state of the underlying inputs before stopping. Formerly, the final state of the underlying
iterator was undefined. iterator was undefined.