bpo-31179: Make dict.copy() up to 5.5 times faster. (#3067)
This commit is contained in:
parent
a4b1bb4801
commit
b0a7a037b8
|
@ -272,10 +272,56 @@ class DictTest(unittest.TestCase):
|
||||||
|
|
||||||
def test_copy(self):
|
def test_copy(self):
|
||||||
d = {1: 1, 2: 2, 3: 3}
|
d = {1: 1, 2: 2, 3: 3}
|
||||||
|
self.assertIsNot(d.copy(), d)
|
||||||
|
self.assertEqual(d.copy(), d)
|
||||||
self.assertEqual(d.copy(), {1: 1, 2: 2, 3: 3})
|
self.assertEqual(d.copy(), {1: 1, 2: 2, 3: 3})
|
||||||
|
|
||||||
|
copy = d.copy()
|
||||||
|
d[4] = 4
|
||||||
|
self.assertNotEqual(copy, d)
|
||||||
|
|
||||||
self.assertEqual({}.copy(), {})
|
self.assertEqual({}.copy(), {})
|
||||||
self.assertRaises(TypeError, d.copy, None)
|
self.assertRaises(TypeError, d.copy, None)
|
||||||
|
|
||||||
|
def test_copy_fuzz(self):
|
||||||
|
for dict_size in [10, 100, 1000, 10000, 100000]:
|
||||||
|
dict_size = random.randrange(
|
||||||
|
dict_size // 2, dict_size + dict_size // 2)
|
||||||
|
with self.subTest(dict_size=dict_size):
|
||||||
|
d = {}
|
||||||
|
for i in range(dict_size):
|
||||||
|
d[i] = i
|
||||||
|
|
||||||
|
d2 = d.copy()
|
||||||
|
self.assertIsNot(d2, d)
|
||||||
|
self.assertEqual(d, d2)
|
||||||
|
d2['key'] = 'value'
|
||||||
|
self.assertNotEqual(d, d2)
|
||||||
|
self.assertEqual(len(d2), len(d) + 1)
|
||||||
|
|
||||||
|
def test_copy_maintains_tracking(self):
|
||||||
|
class A:
|
||||||
|
pass
|
||||||
|
|
||||||
|
key = A()
|
||||||
|
|
||||||
|
for d in ({}, {'a': 1}, {key: 'val'}):
|
||||||
|
d2 = d.copy()
|
||||||
|
self.assertEqual(gc.is_tracked(d), gc.is_tracked(d2))
|
||||||
|
|
||||||
|
def test_copy_noncompact(self):
|
||||||
|
# Dicts don't compact themselves on del/pop operations.
|
||||||
|
# Copy will use a slow merging strategy that produces
|
||||||
|
# a compacted copy when roughly 33% of dict is a non-used
|
||||||
|
# keys-space (to optimize memory footprint).
|
||||||
|
# In this test we want to hit the slow/compacting
|
||||||
|
# branch of dict.copy() and make sure it works OK.
|
||||||
|
d = {k: k for k in range(1000)}
|
||||||
|
for k in range(950):
|
||||||
|
del d[k]
|
||||||
|
d2 = d.copy()
|
||||||
|
self.assertEqual(d2, d)
|
||||||
|
|
||||||
def test_get(self):
|
def test_get(self):
|
||||||
d = {}
|
d = {}
|
||||||
self.assertIs(d.get('c'), None)
|
self.assertIs(d.get('c'), None)
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Make dict.copy() up to 5.5 times faster.
|
|
@ -615,6 +615,52 @@ new_dict_with_shared_keys(PyDictKeysObject *keys)
|
||||||
return new_dict(keys, values);
|
return new_dict(keys, values);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
clone_combined_dict(PyDictObject *orig)
|
||||||
|
{
|
||||||
|
assert(PyDict_CheckExact(orig));
|
||||||
|
assert(orig->ma_values == NULL);
|
||||||
|
assert(orig->ma_keys->dk_refcnt == 1);
|
||||||
|
|
||||||
|
Py_ssize_t keys_size = _PyDict_KeysSize(orig->ma_keys);
|
||||||
|
PyDictKeysObject *keys = PyObject_Malloc(keys_size);
|
||||||
|
if (keys == NULL) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(keys, orig->ma_keys, keys_size);
|
||||||
|
|
||||||
|
/* After copying key/value pairs, we need to incref all
|
||||||
|
keys and values and they are about to be co-owned by a
|
||||||
|
new dict object. */
|
||||||
|
PyDictKeyEntry *ep0 = DK_ENTRIES(keys);
|
||||||
|
Py_ssize_t n = keys->dk_nentries;
|
||||||
|
for (Py_ssize_t i = 0; i < n; i++) {
|
||||||
|
PyDictKeyEntry *entry = &ep0[i];
|
||||||
|
PyObject *value = entry->me_value;
|
||||||
|
if (value != NULL) {
|
||||||
|
Py_INCREF(value);
|
||||||
|
Py_INCREF(entry->me_key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PyDictObject *new = (PyDictObject *)new_dict(keys, NULL);
|
||||||
|
if (new == NULL) {
|
||||||
|
/* In case of an error, `new_dict()` takes care of
|
||||||
|
cleaning up `keys`. */
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
new->ma_used = orig->ma_used;
|
||||||
|
assert(_PyDict_CheckConsistency(new));
|
||||||
|
if (_PyObject_GC_IS_TRACKED(orig)) {
|
||||||
|
/* Maintain tracking. */
|
||||||
|
_PyObject_GC_TRACK(new);
|
||||||
|
}
|
||||||
|
return (PyObject *)new;
|
||||||
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
PyDict_New(void)
|
PyDict_New(void)
|
||||||
{
|
{
|
||||||
|
@ -2484,7 +2530,13 @@ PyDict_Copy(PyObject *o)
|
||||||
PyErr_BadInternalCall();
|
PyErr_BadInternalCall();
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
mp = (PyDictObject *)o;
|
mp = (PyDictObject *)o;
|
||||||
|
if (mp->ma_used == 0) {
|
||||||
|
/* The dict is empty; just return a new dict. */
|
||||||
|
return PyDict_New();
|
||||||
|
}
|
||||||
|
|
||||||
if (_PyDict_HasSplitTable(mp)) {
|
if (_PyDict_HasSplitTable(mp)) {
|
||||||
PyDictObject *split_copy;
|
PyDictObject *split_copy;
|
||||||
Py_ssize_t size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
|
Py_ssize_t size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
|
||||||
|
@ -2510,6 +2562,27 @@ PyDict_Copy(PyObject *o)
|
||||||
_PyObject_GC_TRACK(split_copy);
|
_PyObject_GC_TRACK(split_copy);
|
||||||
return (PyObject *)split_copy;
|
return (PyObject *)split_copy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (PyDict_CheckExact(mp) && mp->ma_values == NULL &&
|
||||||
|
(mp->ma_used >= (mp->ma_keys->dk_nentries * 2) / 3))
|
||||||
|
{
|
||||||
|
/* Use fast-copy if:
|
||||||
|
|
||||||
|
(1) 'mp' is an instance of a subclassed dict; and
|
||||||
|
|
||||||
|
(2) 'mp' is not a split-dict; and
|
||||||
|
|
||||||
|
(3) if 'mp' is non-compact ('del' operation does not resize dicts),
|
||||||
|
do fast-copy only if it has at most 1/3 non-used keys.
|
||||||
|
|
||||||
|
The last condition (3) is important to guard against a pathalogical
|
||||||
|
case when a large dict is almost emptied with multiple del/pop
|
||||||
|
operations and copied after that. In cases like this, we defer to
|
||||||
|
PyDict_Merge, which produces a compacted copy.
|
||||||
|
*/
|
||||||
|
return clone_combined_dict(mp);
|
||||||
|
}
|
||||||
|
|
||||||
copy = PyDict_New();
|
copy = PyDict_New();
|
||||||
if (copy == NULL)
|
if (copy == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
Loading…
Reference in New Issue