bpo-35780: Fix errors in lru_cache() C code (GH-11623)
This commit is contained in:
parent
adad9e6801
commit
d8080c0119
|
@ -454,7 +454,7 @@ class _HashedSeq(list):
|
|||
|
||||
def _make_key(args, kwds, typed,
|
||||
kwd_mark = (object(),),
|
||||
fasttypes = {int, str, frozenset, type(None)},
|
||||
fasttypes = {int, str},
|
||||
tuple=tuple, type=type, len=len):
|
||||
"""Make a cache key from optionally typed positional and keyword arguments
|
||||
|
||||
|
@ -510,8 +510,11 @@ def lru_cache(maxsize=128, typed=False):
|
|||
|
||||
# Early detection of an erroneous call to @lru_cache without any arguments
|
||||
# resulting in the inner function being passed to maxsize instead of an
|
||||
# integer or None.
|
||||
if maxsize is not None and not isinstance(maxsize, int):
|
||||
# integer or None. Negative maxsize is treated as 0.
|
||||
if isinstance(maxsize, int):
|
||||
if maxsize < 0:
|
||||
maxsize = 0
|
||||
elif maxsize is not None:
|
||||
raise TypeError('Expected maxsize to be an integer or None')
|
||||
|
||||
def decorating_function(user_function):
|
||||
|
@ -578,6 +581,7 @@ def _lru_cache_wrapper(user_function, maxsize, typed, _CacheInfo):
|
|||
link[NEXT] = root
|
||||
hits += 1
|
||||
return result
|
||||
misses += 1
|
||||
result = user_function(*args, **kwds)
|
||||
with lock:
|
||||
if key in cache:
|
||||
|
@ -615,7 +619,6 @@ def _lru_cache_wrapper(user_function, maxsize, typed, _CacheInfo):
|
|||
# Use the cache_len bound method instead of the len() function
|
||||
# which could potentially be wrapped in an lru_cache itself.
|
||||
full = (cache_len() >= maxsize)
|
||||
misses += 1
|
||||
return result
|
||||
|
||||
def cache_info():
|
||||
|
|
|
@ -1233,6 +1233,33 @@ class TestLRU:
|
|||
self.assertEqual(misses, 4)
|
||||
self.assertEqual(currsize, 2)
|
||||
|
||||
def test_lru_bug_35780(self):
|
||||
# C version of the lru_cache was not checking to see if
|
||||
# the user function call has already modified the cache
|
||||
# (this arises in recursive calls and in multi-threading).
|
||||
# This cause the cache to have orphan links not referenced
|
||||
# by the cache dictionary.
|
||||
|
||||
once = True # Modified by f(x) below
|
||||
|
||||
@self.module.lru_cache(maxsize=10)
|
||||
def f(x):
|
||||
nonlocal once
|
||||
rv = f'.{x}.'
|
||||
if x == 20 and once:
|
||||
once = False
|
||||
rv = f(x)
|
||||
return rv
|
||||
|
||||
# Fill the cache
|
||||
for x in range(15):
|
||||
self.assertEqual(f(x), f'.{x}.')
|
||||
self.assertEqual(f.cache_info().currsize, 10)
|
||||
|
||||
# Make a recursive call and make sure the cache remains full
|
||||
self.assertEqual(f(20), '.20.')
|
||||
self.assertEqual(f.cache_info().currsize, 10)
|
||||
|
||||
def test_lru_hash_only_once(self):
|
||||
# To protect against weird reentrancy bugs and to improve
|
||||
# efficiency when faced with slow __hash__ methods, the
|
||||
|
@ -1329,7 +1356,7 @@ class TestLRU:
|
|||
for i in (0, 1):
|
||||
self.assertEqual([eq(n) for n in range(150)], list(range(150)))
|
||||
self.assertEqual(eq.cache_info(),
|
||||
self.module._CacheInfo(hits=0, misses=300, maxsize=-10, currsize=1))
|
||||
self.module._CacheInfo(hits=0, misses=300, maxsize=0, currsize=0))
|
||||
|
||||
def test_lru_with_exceptions(self):
|
||||
# Verify that user_function exceptions get passed through without
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
Fix lru_cache() errors arising in recursive, reentrant, or
|
||||
multi-threaded code. These errors could result in orphan links and in
|
||||
the cache being trapped in a state with fewer than the specified maximum
|
||||
number of links. Fix handling of negative maxsize which should have
|
||||
been treated as zero. Fix errors in toggling the "full" status flag.
|
||||
Fix misordering of links when errors are encountered. Sync-up the C
|
||||
code and pure Python code for the space saving path in functions with a
|
||||
single positional argument. In this common case, the space overhead of
|
||||
an lru cache entry is reduced by almost half. Fix counting of cache
|
||||
misses. In error cases, the miss count was out of sync with the actual
|
||||
number of times the underlying user function was called.
|
|
@ -711,16 +711,15 @@ typedef PyObject *(*lru_cache_ternaryfunc)(struct lru_cache_object *, PyObject *
|
|||
|
||||
typedef struct lru_cache_object {
|
||||
lru_list_elem root; /* includes PyObject_HEAD */
|
||||
Py_ssize_t maxsize;
|
||||
PyObject *maxsize_O;
|
||||
PyObject *func;
|
||||
lru_cache_ternaryfunc wrapper;
|
||||
PyObject *cache;
|
||||
PyObject *cache_info_type;
|
||||
Py_ssize_t misses, hits;
|
||||
int typed;
|
||||
PyObject *cache;
|
||||
Py_ssize_t hits;
|
||||
PyObject *func;
|
||||
Py_ssize_t maxsize;
|
||||
Py_ssize_t misses;
|
||||
PyObject *cache_info_type;
|
||||
PyObject *dict;
|
||||
int full;
|
||||
} lru_cache_object;
|
||||
|
||||
static PyTypeObject lru_cache_type;
|
||||
|
@ -733,6 +732,15 @@ lru_cache_make_key(PyObject *args, PyObject *kwds, int typed)
|
|||
|
||||
/* short path, key will match args anyway, which is a tuple */
|
||||
if (!typed && !kwds) {
|
||||
if (PyTuple_GET_SIZE(args) == 1) {
|
||||
key = PyTuple_GET_ITEM(args, 0);
|
||||
if (PyUnicode_CheckExact(key) || PyLong_CheckExact(key)) {
|
||||
/* For common scalar keys, save space by
|
||||
dropping the enclosing args tuple */
|
||||
Py_INCREF(key);
|
||||
return key;
|
||||
}
|
||||
}
|
||||
Py_INCREF(args);
|
||||
return args;
|
||||
}
|
||||
|
@ -835,10 +843,12 @@ infinite_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwd
|
|||
}
|
||||
|
||||
static void
|
||||
lru_cache_extricate_link(lru_list_elem *link)
|
||||
lru_cache_extract_link(lru_list_elem *link)
|
||||
{
|
||||
link->prev->next = link->next;
|
||||
link->next->prev = link->prev;
|
||||
lru_list_elem *link_prev = link->prev;
|
||||
lru_list_elem *link_next = link->next;
|
||||
link_prev->next = link->next;
|
||||
link_next->prev = link->prev;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -851,11 +861,52 @@ lru_cache_append_link(lru_cache_object *self, lru_list_elem *link)
|
|||
link->next = root;
|
||||
}
|
||||
|
||||
static void
|
||||
lru_cache_prepend_link(lru_cache_object *self, lru_list_elem *link)
|
||||
{
|
||||
lru_list_elem *root = &self->root;
|
||||
lru_list_elem *first = root->next;
|
||||
first->prev = root->next = link;
|
||||
link->prev = root;
|
||||
link->next = first;
|
||||
}
|
||||
|
||||
/* General note on reentrancy:
|
||||
|
||||
There are four dictionary calls in the bounded_lru_cache_wrapper():
|
||||
1) The initial check for a cache match. 2) The post user-function
|
||||
check for a cache match. 3) The deletion of the oldest entry.
|
||||
4) The addition of the newest entry.
|
||||
|
||||
In all four calls, we have a known hash which lets use avoid a call
|
||||
to __hash__(). That leaves only __eq__ as a possible source of a
|
||||
reentrant call.
|
||||
|
||||
The __eq__ method call is always made for a cache hit (dict access #1).
|
||||
Accordingly, we have make sure not modify the cache state prior to
|
||||
this call.
|
||||
|
||||
The __eq__ method call is never made for the deletion (dict access #3)
|
||||
because it is an identity match.
|
||||
|
||||
For the other two accesses (#2 and #4), calls to __eq__ only occur
|
||||
when some other entry happens to have an exactly matching hash (all
|
||||
64-bits). Though rare, this can happen, so we have to make sure to
|
||||
either call it at the top of its code path before any cache
|
||||
state modifications (dict access #2) or be prepared to restore
|
||||
invariants at the end of the code path (dict access #4).
|
||||
|
||||
Another possible source of reentrancy is a decref which can trigger
|
||||
arbitrary code execution. To make the code easier to reason about,
|
||||
the decrefs are deferred to the end of the each possible code path
|
||||
so that we know the cache is a consistent state.
|
||||
*/
|
||||
|
||||
static PyObject *
|
||||
bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
lru_list_elem *link;
|
||||
PyObject *key, *result;
|
||||
PyObject *key, *result, *testresult;
|
||||
Py_hash_t hash;
|
||||
|
||||
key = lru_cache_make_key(args, kwds, self->typed);
|
||||
|
@ -867,11 +918,11 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds
|
|||
return NULL;
|
||||
}
|
||||
link = (lru_list_elem *)_PyDict_GetItem_KnownHash(self->cache, key, hash);
|
||||
if (link) {
|
||||
lru_cache_extricate_link(link);
|
||||
if (link != NULL) {
|
||||
lru_cache_extract_link(link);
|
||||
lru_cache_append_link(self, link);
|
||||
self->hits++;
|
||||
result = link->result;
|
||||
self->hits++;
|
||||
Py_INCREF(result);
|
||||
Py_DECREF(key);
|
||||
return result;
|
||||
|
@ -880,65 +931,38 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds
|
|||
Py_DECREF(key);
|
||||
return NULL;
|
||||
}
|
||||
self->misses++;
|
||||
result = PyObject_Call(self->func, args, kwds);
|
||||
if (!result) {
|
||||
Py_DECREF(key);
|
||||
return NULL;
|
||||
}
|
||||
if (self->full && self->root.next != &self->root) {
|
||||
/* Use the oldest item to store the new key and result. */
|
||||
PyObject *oldkey, *oldresult, *popresult;
|
||||
/* Extricate the oldest item. */
|
||||
link = self->root.next;
|
||||
lru_cache_extricate_link(link);
|
||||
/* Remove it from the cache.
|
||||
The cache dict holds one reference to the link,
|
||||
and the linked list holds yet one reference to it. */
|
||||
popresult = _PyDict_Pop_KnownHash(self->cache,
|
||||
link->key, link->hash,
|
||||
Py_None);
|
||||
if (popresult == Py_None) {
|
||||
/* Getting here means that this same key was added to the
|
||||
cache while the lock was released. Since the link
|
||||
update is already done, we need only return the
|
||||
computed result and update the count of misses. */
|
||||
Py_DECREF(popresult);
|
||||
Py_DECREF(link);
|
||||
Py_DECREF(key);
|
||||
}
|
||||
else if (popresult == NULL) {
|
||||
lru_cache_append_link(self, link);
|
||||
Py_DECREF(key);
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
else {
|
||||
Py_DECREF(popresult);
|
||||
/* Keep a reference to the old key and old result to
|
||||
prevent their ref counts from going to zero during the
|
||||
update. That will prevent potentially arbitrary object
|
||||
clean-up code (i.e. __del__) from running while we're
|
||||
still adjusting the links. */
|
||||
oldkey = link->key;
|
||||
oldresult = link->result;
|
||||
testresult = _PyDict_GetItem_KnownHash(self->cache, key, hash);
|
||||
if (testresult != NULL) {
|
||||
/* Getting here means that this same key was added to the cache
|
||||
during the PyObject_Call(). Since the link update is already
|
||||
done, we need only return the computed result. */
|
||||
Py_DECREF(key);
|
||||
return result;
|
||||
}
|
||||
if (PyErr_Occurred()) {
|
||||
/* This is an unusual case since this same lookup
|
||||
did not previously trigger an error during lookup.
|
||||
Treat it the same as an error in user function
|
||||
and return with the error set. */
|
||||
Py_DECREF(key);
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
/* This is the normal case. The new key wasn't found before
|
||||
user function call and it is still not there. So we
|
||||
proceed normally and update the cache with the new result. */
|
||||
|
||||
link->hash = hash;
|
||||
link->key = key;
|
||||
link->result = result;
|
||||
if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link,
|
||||
hash) < 0) {
|
||||
Py_DECREF(link);
|
||||
Py_DECREF(oldkey);
|
||||
Py_DECREF(oldresult);
|
||||
return NULL;
|
||||
}
|
||||
lru_cache_append_link(self, link);
|
||||
Py_INCREF(result); /* for return */
|
||||
Py_DECREF(oldkey);
|
||||
Py_DECREF(oldresult);
|
||||
}
|
||||
} else {
|
||||
/* Put result in a new link at the front of the queue. */
|
||||
assert(self->maxsize > 0);
|
||||
if (PyDict_GET_SIZE(self->cache) < self->maxsize ||
|
||||
self->root.next == &self->root)
|
||||
{
|
||||
/* Cache is not full, so put the result in a new link */
|
||||
link = (lru_list_elem *)PyObject_New(lru_list_elem,
|
||||
&lru_list_elem_type);
|
||||
if (link == NULL) {
|
||||
|
@ -950,6 +974,11 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds
|
|||
link->hash = hash;
|
||||
link->key = key;
|
||||
link->result = result;
|
||||
/* What is really needed here is a SetItem variant with a "no clobber"
|
||||
option. If the __eq__ call triggers a reentrant call that adds
|
||||
this same key, then this setitem call will update the cache dict
|
||||
with this new link, leaving the old link as an orphan (i.e. not
|
||||
having a cache dict entry that refers to it). */
|
||||
if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link,
|
||||
hash) < 0) {
|
||||
Py_DECREF(link);
|
||||
|
@ -957,9 +986,83 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds
|
|||
}
|
||||
lru_cache_append_link(self, link);
|
||||
Py_INCREF(result); /* for return */
|
||||
self->full = (PyDict_GET_SIZE(self->cache) >= self->maxsize);
|
||||
return result;
|
||||
}
|
||||
self->misses++;
|
||||
/* Since the cache is full, we need to evict an old key and add
|
||||
a new key. Rather than free the old link and allocate a new
|
||||
one, we reuse the link for the new key and result and move it
|
||||
to front of the cache to mark it as recently used.
|
||||
|
||||
We try to assure all code paths (including errors) leave all
|
||||
of the links in place. Either the link is successfully
|
||||
updated and moved or it is restored to its old position.
|
||||
However if an unrecoverable error is found, it doesn't
|
||||
make sense to reinsert the link, so we leave it out
|
||||
and the cache will no longer register as full.
|
||||
*/
|
||||
PyObject *oldkey, *oldresult, *popresult;
|
||||
|
||||
/* Extract the oldest item. */
|
||||
assert(self->root.next != &self->root);
|
||||
link = self->root.next;
|
||||
lru_cache_extract_link(link);
|
||||
/* Remove it from the cache.
|
||||
The cache dict holds one reference to the link,
|
||||
and the linked list holds yet one reference to it. */
|
||||
popresult = _PyDict_Pop_KnownHash(self->cache, link->key,
|
||||
link->hash, Py_None);
|
||||
if (popresult == Py_None) {
|
||||
/* Getting here means that the user function call or another
|
||||
thread has already removed the old key from the dictionary.
|
||||
This link is now an orpan. Since we don't want to leave the
|
||||
cache in an inconsistent state, we don't restore the link. */
|
||||
Py_DECREF(popresult);
|
||||
Py_DECREF(link);
|
||||
Py_DECREF(key);
|
||||
return result;
|
||||
}
|
||||
if (popresult == NULL) {
|
||||
/* An error arose while trying to remove the oldest key (the one
|
||||
being evicted) from the cache. We restore the link to its
|
||||
original position as the oldest link. Then we allow the
|
||||
error propagate upward; treating it the same as an error
|
||||
arising in the user function. */
|
||||
lru_cache_prepend_link(self, link);
|
||||
Py_DECREF(key);
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
/* Keep a reference to the old key and old result to prevent their
|
||||
ref counts from going to zero during the update. That will
|
||||
prevent potentially arbitrary object clean-up code (i.e. __del__)
|
||||
from running while we're still adjusting the links. */
|
||||
oldkey = link->key;
|
||||
oldresult = link->result;
|
||||
|
||||
link->hash = hash;
|
||||
link->key = key;
|
||||
link->result = result;
|
||||
/* Note: The link is being added to the cache dict without the
|
||||
prev and next fields set to valid values. We have to wait
|
||||
for successful insertion in the cache dict before adding the
|
||||
link to the linked list. Otherwise, the potentially reentrant
|
||||
__eq__ call could cause the then ophan link to be visited. */
|
||||
if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link,
|
||||
hash) < 0) {
|
||||
/* Somehow the cache dict update failed. We no longer can
|
||||
restore the old link. Let the error propagate upward and
|
||||
leave the cache short one link. */
|
||||
Py_DECREF(popresult);
|
||||
Py_DECREF(link);
|
||||
Py_DECREF(oldkey);
|
||||
Py_DECREF(oldresult);
|
||||
return NULL;
|
||||
}
|
||||
lru_cache_append_link(self, link);
|
||||
Py_INCREF(result); /* for return */
|
||||
Py_DECREF(popresult);
|
||||
Py_DECREF(oldkey);
|
||||
Py_DECREF(oldresult);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -995,6 +1098,9 @@ lru_cache_new(PyTypeObject *type, PyObject *args, PyObject *kw)
|
|||
maxsize = PyNumber_AsSsize_t(maxsize_O, PyExc_OverflowError);
|
||||
if (maxsize == -1 && PyErr_Occurred())
|
||||
return NULL;
|
||||
if (maxsize < 0) {
|
||||
maxsize = 0;
|
||||
}
|
||||
if (maxsize == 0)
|
||||
wrapper = uncached_lru_cache_wrapper;
|
||||
else
|
||||
|
@ -1013,20 +1119,17 @@ lru_cache_new(PyTypeObject *type, PyObject *args, PyObject *kw)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
obj->cache = cachedict;
|
||||
obj->root.prev = &obj->root;
|
||||
obj->root.next = &obj->root;
|
||||
obj->maxsize = maxsize;
|
||||
Py_INCREF(maxsize_O);
|
||||
obj->maxsize_O = maxsize_O;
|
||||
obj->wrapper = wrapper;
|
||||
obj->typed = typed;
|
||||
obj->cache = cachedict;
|
||||
Py_INCREF(func);
|
||||
obj->func = func;
|
||||
obj->wrapper = wrapper;
|
||||
obj->misses = obj->hits = 0;
|
||||
obj->typed = typed;
|
||||
obj->maxsize = maxsize;
|
||||
Py_INCREF(cache_info_type);
|
||||
obj->cache_info_type = cache_info_type;
|
||||
|
||||
return (PyObject *)obj;
|
||||
}
|
||||
|
||||
|
@ -1060,11 +1163,10 @@ lru_cache_dealloc(lru_cache_object *obj)
|
|||
PyObject_GC_UnTrack(obj);
|
||||
|
||||
list = lru_cache_unlink_list(obj);
|
||||
Py_XDECREF(obj->maxsize_O);
|
||||
Py_XDECREF(obj->func);
|
||||
Py_XDECREF(obj->cache);
|
||||
Py_XDECREF(obj->dict);
|
||||
Py_XDECREF(obj->func);
|
||||
Py_XDECREF(obj->cache_info_type);
|
||||
Py_XDECREF(obj->dict);
|
||||
lru_cache_clear_list(list);
|
||||
Py_TYPE(obj)->tp_free(obj);
|
||||
}
|
||||
|
@ -1088,8 +1190,13 @@ lru_cache_descr_get(PyObject *self, PyObject *obj, PyObject *type)
|
|||
static PyObject *
|
||||
lru_cache_cache_info(lru_cache_object *self, PyObject *unused)
|
||||
{
|
||||
return PyObject_CallFunction(self->cache_info_type, "nnOn",
|
||||
self->hits, self->misses, self->maxsize_O,
|
||||
if (self->maxsize == -1) {
|
||||
return PyObject_CallFunction(self->cache_info_type, "nnOn",
|
||||
self->hits, self->misses, Py_None,
|
||||
PyDict_GET_SIZE(self->cache));
|
||||
}
|
||||
return PyObject_CallFunction(self->cache_info_type, "nnnn",
|
||||
self->hits, self->misses, self->maxsize,
|
||||
PyDict_GET_SIZE(self->cache));
|
||||
}
|
||||
|
||||
|
@ -1098,7 +1205,6 @@ lru_cache_cache_clear(lru_cache_object *self, PyObject *unused)
|
|||
{
|
||||
lru_list_elem *list = lru_cache_unlink_list(self);
|
||||
self->hits = self->misses = 0;
|
||||
self->full = 0;
|
||||
PyDict_Clear(self->cache);
|
||||
lru_cache_clear_list(list);
|
||||
Py_RETURN_NONE;
|
||||
|
@ -1134,7 +1240,6 @@ lru_cache_tp_traverse(lru_cache_object *self, visitproc visit, void *arg)
|
|||
Py_VISIT(link->result);
|
||||
link = next;
|
||||
}
|
||||
Py_VISIT(self->maxsize_O);
|
||||
Py_VISIT(self->func);
|
||||
Py_VISIT(self->cache);
|
||||
Py_VISIT(self->cache_info_type);
|
||||
|
@ -1146,7 +1251,6 @@ static int
|
|||
lru_cache_tp_clear(lru_cache_object *self)
|
||||
{
|
||||
lru_list_elem *list = lru_cache_unlink_list(self);
|
||||
Py_CLEAR(self->maxsize_O);
|
||||
Py_CLEAR(self->func);
|
||||
Py_CLEAR(self->cache);
|
||||
Py_CLEAR(self->cache_info_type);
|
||||
|
|
Loading…
Reference in New Issue