Issue #28969: Fixed race condition in C implementation of functools.lru_cache.

KeyError could be raised when cached function with full cache was simultaneously called from differen threads with the same uncached arguments.
2017-01-12 19:42:20 +02:00 · 2017-01-12 19:42:20 +02:00 · 617c7753ce
parent 798ad2742b 42e1ea9a10
commit 617c7753ce
5 changed files with 78 additions and 29 deletions
--- a/Include/dictobject.h
+++ b/Include/dictobject.h
@ -117,6 +117,7 @@ PyAPI_FUNC(int) _PyDict_HasOnlyStringKeys(PyObject *mp);
 Py_ssize_t _PyDict_KeysSize(PyDictKeysObject *keys);
 Py_ssize_t _PyDict_SizeOf(PyDictObject *);
 PyAPI_FUNC(PyObject *) _PyDict_Pop(PyObject *, PyObject *, PyObject *);
+PyObject *_PyDict_Pop_KnownHash(PyObject *, PyObject *, Py_hash_t, PyObject *);
 PyObject *_PyDict_FromKeys(PyObject *, PyObject *, PyObject *);
 #define _PyDict_HasSplitTable(d) ((d)->ma_values != NULL)

--- a/Lib/test/test_functools.py
+++ b/Lib/test/test_functools.py
@ -7,6 +7,7 @@ import pickle
 from random import choice
 import sys
 from test import support
+import time
 import unittest
 import unittest.mock
 from weakref import proxy
@ -1446,6 +1447,20 @@ class TestLRU:
                pause.reset()
                self.assertEqual(f.cache_info(), (0, (i+1)*n, m*n, i+1))

+    @unittest.skipUnless(threading, 'This test requires threading.')
+    def test_lru_cache_threaded3(self):
+        @self.module.lru_cache(maxsize=2)
+        def f(x):
+            time.sleep(.01)
+            return 3 * x
+        def test(i, x):
+            with self.subTest(thread=i):
+                self.assertEqual(f(x), 3 * x, i)
+        threads = [threading.Thread(target=test, args=(i, v))
+                   for i, v in enumerate([1, 2, 2, 3, 2])]
+        with support.start_threads(threads):
+            pass
+
    def test_need_for_rlock(self):
        # This will deadlock on an LRU cache that uses a regular lock

--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -212,6 +212,10 @@ Core and Builtins
 Library
 -------

+- Issue #28969: Fixed race condition in C implementation of functools.lru_cache.
+  KeyError could be raised when cached function with full cache was
+  simultaneously called from differen threads with the same uncached arguments.
+
 - Issue #20804: The unittest.mock.sentinel attributes now preserve their
  identity when they are copied or pickled.

--- a/Modules/_functoolsmodule.c
+++ b/Modules/_functoolsmodule.c
@ -863,42 +863,56 @@ bounded_lru_cache_wrapper(lru_cache_object *self, PyObject *args, PyObject *kwds
    }
    if (self->full && self->root.next != &self->root) {
        /* Use the oldest item to store the new key and result. */
-        PyObject *oldkey, *oldresult;
+        PyObject *oldkey, *oldresult, *popresult;
        /* Extricate the oldest item. */
        link = self->root.next;
        lru_cache_extricate_link(link);
        /* Remove it from the cache.
           The cache dict holds one reference to the link,
           and the linked list holds yet one reference to it. */
-        if (_PyDict_DelItem_KnownHash(self->cache, link->key,
-                                      link->hash) < 0) {
+        popresult = _PyDict_Pop_KnownHash(self->cache,
+                                          link->key, link->hash,
+                                          Py_None);
+        if (popresult == Py_None) {
+            /* Getting here means that this same key was added to the
+               cache while the lock was released.  Since the link
+               update is already done, we need only return the
+               computed result and update the count of misses. */
+            Py_DECREF(popresult);
+            Py_DECREF(link);
+            Py_DECREF(key);
+        }
+        else if (popresult == NULL) {
            lru_cache_append_link(self, link);
            Py_DECREF(key);
            Py_DECREF(result);
            return NULL;
        }
-        /* Keep a reference to the old key and old result to
-           prevent their ref counts from going to zero during the
-           update. That will prevent potentially arbitrary object
-           clean-up code (i.e. __del__) from running while we're
-           still adjusting the links. */
-        oldkey = link->key;
-        oldresult = link->result;
+        else {
+            Py_DECREF(popresult);
+            /* Keep a reference to the old key and old result to
+               prevent their ref counts from going to zero during the
+               update. That will prevent potentially arbitrary object
+               clean-up code (i.e. __del__) from running while we're
+               still adjusting the links. */
+            oldkey = link->key;
+            oldresult = link->result;

-        link->hash = hash;
-        link->key = key;
-        link->result = result;
-        if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link,
-                                      hash) < 0) {
-            Py_DECREF(link);
+            link->hash = hash;
+            link->key = key;
+            link->result = result;
+            if (_PyDict_SetItem_KnownHash(self->cache, key, (PyObject *)link,
+                                          hash) < 0) {
+                Py_DECREF(link);
+                Py_DECREF(oldkey);
+                Py_DECREF(oldresult);
+                return NULL;
+            }
+            lru_cache_append_link(self, link);
+            Py_INCREF(result); /* for return */
            Py_DECREF(oldkey);
            Py_DECREF(oldresult);
-            return NULL;
        }
-        lru_cache_append_link(self, link);
-        Py_INCREF(result); /* for return */
-        Py_DECREF(oldkey);
-        Py_DECREF(oldresult);
    } else {
        /* Put result in a new link at the front of the queue. */
        link = (lru_list_elem *)PyObject_GC_New(lru_list_elem,
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@ -1817,9 +1817,8 @@ PyDict_Next(PyObject *op, Py_ssize_t *ppos, PyObject **pkey, PyObject **pvalue)

 /* Internal version of dict.pop(). */
 PyObject *
-_PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt)
+_PyDict_Pop_KnownHash(PyObject *dict, PyObject *key, Py_hash_t hash, PyObject *deflt)
 {
-    Py_hash_t hash;
    Py_ssize_t ix, hashpos;
    PyObject *old_value, *old_key;
    PyDictKeyEntry *ep;
@ -1836,12 +1835,6 @@ _PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt)
        _PyErr_SetKeyError(key);
        return NULL;
    }
-    if (!PyUnicode_CheckExact(key) ||
-        (hash = ((PyASCIIObject *) key)->hash) == -1) {
-        hash = PyObject_Hash(key);
-        if (hash == -1)
-            return NULL;
-    }
    ix = (mp->ma_keys->dk_lookup)(mp, key, hash, &old_value, &hashpos);
    if (ix == DKIX_ERROR)
        return NULL;
@ -1878,6 +1871,28 @@ _PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt)
    return old_value;
 }

+PyObject *
+_PyDict_Pop(PyObject *dict, PyObject *key, PyObject *deflt)
+{
+    Py_hash_t hash;
+
+    if (((PyDictObject *)dict)->ma_used == 0) {
+        if (deflt) {
+            Py_INCREF(deflt);
+            return deflt;
+        }
+        _PyErr_SetKeyError(key);
+        return NULL;
+    }
+    if (!PyUnicode_CheckExact(key) ||
+        (hash = ((PyASCIIObject *) key)->hash) == -1) {
+        hash = PyObject_Hash(key);
+        if (hash == -1)
+            return NULL;
+    }
+    return _PyDict_Pop_KnownHash(dict, key, hash, deflt);
+}
+
 /* Internal version of dict.from_keys().  It is subclass-friendly. */
 PyObject *
 _PyDict_FromKeys(PyObject *cls, PyObject *iterable, PyObject *value)