diff --git a/Include/dictobject.h b/Include/dictobject.h index 6ef5e0323c8..c4f2e2f5b3b 100644 --- a/Include/dictobject.h +++ b/Include/dictobject.h @@ -115,6 +115,7 @@ PyAPI_FUNC(int) _PyDict_HasOnlyStringKeys(PyObject *mp); Py_ssize_t _PyDict_KeysSize(PyDictKeysObject *keys); Py_ssize_t _PyDict_SizeOf(PyDictObject *); PyAPI_FUNC(PyObject *) _PyDict_Pop(PyObject *, PyObject *, PyObject *); +PyObject *_PyDict_Pop_KnownHash(PyObject *, PyObject *, Py_hash_t, PyObject *); PyObject *_PyDict_FromKeys(PyObject *, PyObject *, PyObject *); #define _PyDict_HasSplitTable(d) ((d)->ma_values != NULL) diff --git a/Lib/test/test_functools.py b/Lib/test/test_functools.py index 27eb57685a5..824549b8034 100644 --- a/Lib/test/test_functools.py +++ b/Lib/test/test_functools.py @@ -7,6 +7,7 @@ import pickle from random import choice import sys from test import support +import time import unittest from weakref import proxy import contextlib @@ -1401,6 +1402,20 @@ class TestLRU: pause.reset() self.assertEqual(f.cache_info(), (0, (i+1)*n, m*n, i+1)) + @unittest.skipUnless(threading, 'This test requires threading.') + def test_lru_cache_threaded3(self): + @self.module.lru_cache(maxsize=2) + def f(x): + time.sleep(.01) + return 3 * x + def test(i, x): + with self.subTest(thread=i): + self.assertEqual(f(x), 3 * x, i) + threads = [threading.Thread(target=test, args=(i, v)) + for i, v in enumerate([1, 2, 2, 3, 2])] + with support.start_threads(threads): + pass + def test_need_for_rlock(self): # This will deadlock on an LRU cache that uses a regular lock diff --git a/Misc/NEWS b/Misc/NEWS index 165e30e0ed7..3b84dd77910 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -44,6 +44,10 @@ Core and Builtins Library ------- +- Issue #28969: Fixed race condition in C implementation of functools.lru_cache. + KeyError could be raised when cached function with full cache was + simultaneously called from differen threads with the same uncached arguments. + - Issue #29142: In urllib.request, suffixes in no_proxy environment variable with leading dots could match related hostnames again (e.g. .b.c matches a.b.c). Patch by Milan Oberkirch. diff --git a/Modules/_functoolsmodule.c b/Modules/_functoolsmodule.c index ca61fcd8b0b..f785a7260e6 100644 --- a/Modules/_functoolsmodule.c +++ b/Modules/_functoolsmodule.c @@ -863,42 +863,56 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds } if (self->full && self->root.next != &self->root) { /* Use the oldest item to store the new key and result. */ - PyObject *oldkey, *oldresult; + PyObject *oldkey, *oldresult, *popresult; /* Extricate the oldest item. */ link = self->root.next; lru_cache_extricate_link(link); /* Remove it from the cache. The cache dict holds one reference to the link, and the linked list holds yet one reference to it. */ - if (_PyDict_DelItem_KnownHash(self->cache, link->key, - link->hash) < 0) { + popresult = _PyDict_Pop_KnownHash(self->cache, + link->key, link->hash, + Py_None); + if (popresult == Py_None) { + /* Getting here means that this same key was added to the + cache while the lock was released. Since the link + update is already done, we need only return the + computed result and update the count of misses. */ + Py_DECREF(popresult); + Py_DECREF(link); + Py_DECREF(key); + } + else if (popresult == NULL) { lru_cache_append_link(self, link); Py_DECREF(key); Py_DECREF(result); return NULL; } - /* Keep a reference to the old key and old result to - prevent their ref counts from going to zero during the - update. That will prevent potentially arbitrary object - clean-up code (i.e. __del__) from running while we're - still adjusting the links. */ - oldkey = link->key; - oldresult = link->result; + else { + Py_DECREF(popresult); + /* Keep a reference to the old key and old result to + prevent their ref counts from going to zero during the + update. That will prevent potentially arbitrary object + clean-up code (i.e. __del__) from running while we're + still adjusting the links. */ + oldkey = link->key; + oldresult = link->result; - link->hash = hash; - link->key = key; - link->result = result; - if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link, - hash) < 0) { - Py_DECREF(link); + link->hash = hash; + link->key = key; + link->result = result; + if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link, + hash) < 0) { + Py_DECREF(link); + Py_DECREF(oldkey); + Py_DECREF(oldresult); + return NULL; + } + lru_cache_append_link(self, link); + Py_INCREF(result); /* for return */ Py_DECREF(oldkey); Py_DECREF(oldresult); - return NULL; } - lru_cache_append_link(self, link); - Py_INCREF(result); /* for return */ - Py_DECREF(oldkey); - Py_DECREF(oldresult); } else { /* Put result in a new link at the front of the queue. */ link = (lru_list_elem *)PyObject_GC_New(lru_list_elem, diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 64941937e74..a7b403bcecc 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -1829,9 +1829,8 @@ PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue) /* Internal version of dict.pop(). */ PyObject * -_PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt) +_PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *deflt) { - Py_hash_t hash; Py_ssize_t ix, hashpos; PyObject *old_value, *old_key; PyDictKeyEntry *ep; @@ -1849,12 +1848,6 @@ _PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt) _PyErr_SetKeyError(key); return NULL; } - if (!PyUnicode_CheckExact(key) || - (hash = ((PyASCIIObject *) key)->hash) == -1) { - hash = PyObject_Hash(key); - if (hash == -1) - return NULL; - } ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &value_addr, &hashpos); if (ix == DKIX_ERROR) return NULL; @@ -1892,6 +1885,28 @@ _PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt) return old_value; } +PyObject * +_PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt) +{ + Py_hash_t hash; + + if (((PyDictObject *)dict)->ma_used == 0) { + if (deflt) { + Py_INCREF(deflt); + return deflt; + } + _PyErr_SetKeyError(key); + return NULL; + } + if (!PyUnicode_CheckExact(key) || + (hash = ((PyASCIIObject *) key)->hash) == -1) { + hash = PyObject_Hash(key); + if (hash == -1) + return NULL; + } + return _PyDict_Pop_KnownHash(dict, key, hash, deflt); +} + /* Internal version of dict.from_keys(). It is subclass-friendly. */ PyObject * _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)