Implement compact dict
Issue #27350: `dict` implementation is changed like PyPy. It is more compact and preserves insertion order. _PyDict_Dummy() function has been removed. Disable test_gdb: python-gdb.py is not updated yet to the new structure of compact dictionaries (issue #28023). Patch written by INADA Naoki.
This commit is contained in:
parent
d8b7770a0e
commit
742da040db
|
@ -343,6 +343,11 @@ Other Language Changes
|
|||
|
||||
Some smaller changes made to the core Python language are:
|
||||
|
||||
* `dict` implementation is changed like PyPy. It is more compact and preserves
|
||||
insertion order. :pep:`PEP 468` (Preserving the order of `**kwargs` in a
|
||||
function.) is implemented by this.
|
||||
(Contributed by INADA Naoki in :issue:`27350`.)
|
||||
|
||||
* Long sequences of repeated traceback lines are now abbreviated as
|
||||
``"[Previous line repeated {count} more times]"`` (see
|
||||
:ref:`py36-traceback` for an example).
|
||||
|
|
|
@ -710,7 +710,6 @@ you can count such references to the type object.)
|
|||
PyAPI_DATA(Py_ssize_t) _Py_RefTotal;
|
||||
PyAPI_FUNC(void) _Py_NegativeRefcount(const char *fname,
|
||||
int lineno, PyObject *op);
|
||||
PyAPI_FUNC(PyObject *) _PyDict_Dummy(void);
|
||||
PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void);
|
||||
#define _Py_INC_REFTOTAL _Py_RefTotal++
|
||||
#define _Py_DEC_REFTOTAL _Py_RefTotal--
|
||||
|
|
|
@ -5116,12 +5116,14 @@ class SharedKeyTests(unittest.TestCase):
|
|||
a, b = A(), B()
|
||||
self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b)))
|
||||
self.assertLess(sys.getsizeof(vars(a)), sys.getsizeof({}))
|
||||
a.x, a.y, a.z, a.w = range(4)
|
||||
# Initial hash table can contain at most 5 elements.
|
||||
# Set 6 attributes to cause internal resizing.
|
||||
a.x, a.y, a.z, a.w, a.v, a.u = range(6)
|
||||
self.assertNotEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b)))
|
||||
a2 = A()
|
||||
self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(a2)))
|
||||
self.assertLess(sys.getsizeof(vars(a)), sys.getsizeof({}))
|
||||
b.u, b.v, b.w, b.t = range(4)
|
||||
b.u, b.v, b.w, b.t, b.s, b.r = range(6)
|
||||
self.assertLess(sys.getsizeof(vars(b)), sys.getsizeof({}))
|
||||
|
||||
|
||||
|
|
|
@ -11,6 +11,9 @@ import sysconfig
|
|||
import unittest
|
||||
import locale
|
||||
|
||||
# FIXME: issue #28023
|
||||
raise unittest.SkipTest("FIXME: issue #28023, compact dict (issue #27350) broke python-gdb.py")
|
||||
|
||||
# Is this Python configured to support threads?
|
||||
try:
|
||||
import _thread
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import builtins
|
||||
import contextlib
|
||||
import copy
|
||||
import gc
|
||||
|
@ -621,6 +622,25 @@ class PurePythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
|
|||
OrderedDict = py_coll.OrderedDict
|
||||
|
||||
|
||||
class CPythonBuiltinDictTests(unittest.TestCase):
|
||||
"""Builtin dict preserves insertion order.
|
||||
|
||||
Reuse some of tests in OrderedDict selectively.
|
||||
"""
|
||||
|
||||
module = builtins
|
||||
OrderedDict = dict
|
||||
|
||||
for method in (
|
||||
"test_init test_update test_abc test_clear test_delitem " +
|
||||
"test_setitem test_detect_deletion_during_iteration " +
|
||||
"test_popitem test_reinsert test_override_update " +
|
||||
"test_highly_nested test_highly_nested_subclass " +
|
||||
"test_delitem_hash_collision ").split():
|
||||
setattr(CPythonBuiltinDictTests, method, getattr(OrderedDictTests, method))
|
||||
del method
|
||||
|
||||
|
||||
@unittest.skipUnless(c_coll, 'requires the C version of the collections module')
|
||||
class CPythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
|
||||
|
||||
|
@ -635,18 +655,19 @@ class CPythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
|
|||
size = support.calcobjsize
|
||||
check = self.check_sizeof
|
||||
|
||||
basicsize = size('n2P' + '3PnPn2P') + calcsize('2nPn')
|
||||
entrysize = calcsize('n2P') + calcsize('P')
|
||||
basicsize = size('n2P' + '3PnPn2P') + calcsize('2nP2n')
|
||||
entrysize = calcsize('n2P')
|
||||
p = calcsize('P')
|
||||
nodesize = calcsize('Pn2P')
|
||||
|
||||
od = OrderedDict()
|
||||
check(od, basicsize + 8*entrysize)
|
||||
check(od, basicsize + 8*p + 8 + 5*entrysize) # 8byte indicies + 8*2//3 * entry table
|
||||
od.x = 1
|
||||
check(od, basicsize + 8*entrysize)
|
||||
check(od, basicsize + 8*p + 8 + 5*entrysize)
|
||||
od.update([(i, i) for i in range(3)])
|
||||
check(od, basicsize + 8*entrysize + 3*nodesize)
|
||||
check(od, basicsize + 8*p + 8 + 5*entrysize + 3*nodesize)
|
||||
od.update([(i, i) for i in range(3, 10)])
|
||||
check(od, basicsize + 16*entrysize + 10*nodesize)
|
||||
check(od, basicsize + 16*p + 16 + 10*entrysize + 10*nodesize)
|
||||
|
||||
check(od.keys(), size('P'))
|
||||
check(od.items(), size('P'))
|
||||
|
|
|
@ -936,9 +936,9 @@ class SizeofTest(unittest.TestCase):
|
|||
# method-wrapper (descriptor object)
|
||||
check({}.__iter__, size('2P'))
|
||||
# dict
|
||||
check({}, size('n2P') + calcsize('2nPn') + 8*calcsize('n2P'))
|
||||
check({}, size('n2P') + calcsize('2nP2n') + 8 + (8*2//3)*calcsize('n2P'))
|
||||
longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8}
|
||||
check(longdict, size('n2P') + calcsize('2nPn') + 16*calcsize('n2P'))
|
||||
check(longdict, size('n2P') + calcsize('2nP2n') + 16 + (16*2//3)*calcsize('n2P'))
|
||||
# dictionary-keyview
|
||||
check({}.keys(), size('P'))
|
||||
# dictionary-valueview
|
||||
|
@ -1096,13 +1096,13 @@ class SizeofTest(unittest.TestCase):
|
|||
'10P' # PySequenceMethods
|
||||
'2P' # PyBufferProcs
|
||||
'4P')
|
||||
# Separate block for PyDictKeysObject with 4 entries
|
||||
s += calcsize("2nPn") + 4*calcsize("n2P")
|
||||
# Separate block for PyDictKeysObject with 8 keys and 5 entries
|
||||
s += calcsize("2nP2n") + 8 + 5*calcsize("n2P")
|
||||
# class
|
||||
class newstyleclass(object): pass
|
||||
check(newstyleclass, s)
|
||||
# dict with shared keys
|
||||
check(newstyleclass().__dict__, size('n2P' + '2nPn'))
|
||||
check(newstyleclass().__dict__, size('n2P' + '2nP2n'))
|
||||
# unicode
|
||||
# each tuple contains a string and its expected character size
|
||||
# don't put any static strings here, as they may contain
|
||||
|
|
|
@ -1325,13 +1325,16 @@ class MappingTestCase(TestBase):
|
|||
o = Object(123456)
|
||||
with testcontext():
|
||||
n = len(dict)
|
||||
dict.popitem()
|
||||
# Since underlaying dict is ordered, first item is popped
|
||||
dict.pop(next(dict.keys()))
|
||||
self.assertEqual(len(dict), n - 1)
|
||||
dict[o] = o
|
||||
self.assertEqual(len(dict), n)
|
||||
# last item in objects is removed from dict in context shutdown
|
||||
with testcontext():
|
||||
self.assertEqual(len(dict), n - 1)
|
||||
dict.pop(next(dict.keys()))
|
||||
# Then, (o, o) is popped
|
||||
dict.popitem()
|
||||
self.assertEqual(len(dict), n - 2)
|
||||
with testcontext():
|
||||
self.assertEqual(len(dict), n - 3)
|
||||
|
|
|
@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #27350: `dict` implementation is changed like PyPy. It is more compact
|
||||
and preserves insertion order.
|
||||
|
||||
- Issue #27911: Remove unnecessary error checks in
|
||||
``exec_builtin_or_dynamic()``.
|
||||
|
||||
|
|
|
@ -8,15 +8,25 @@ typedef struct {
|
|||
PyObject *me_value; /* This field is only meaningful for combined tables */
|
||||
} PyDictKeyEntry;
|
||||
|
||||
typedef PyDictKeyEntry *(*dict_lookup_func)
|
||||
(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr);
|
||||
/* dict_lookup_func() returns index of entry which can be used like DK_ENTRIES(dk)[index].
|
||||
* -1 when no entry found, -3 when compare raises error.
|
||||
*/
|
||||
typedef Py_ssize_t (*dict_lookup_func)
|
||||
(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr,
|
||||
Py_ssize_t *hashpos);
|
||||
|
||||
#define DKIX_EMPTY (-1)
|
||||
#define DKIX_DUMMY (-2) /* Used internally */
|
||||
#define DKIX_ERROR (-3)
|
||||
|
||||
/* See dictobject.c for actual layout of DictKeysObject */
|
||||
struct _dictkeysobject {
|
||||
Py_ssize_t dk_refcnt;
|
||||
Py_ssize_t dk_size;
|
||||
dict_lookup_func dk_lookup;
|
||||
Py_ssize_t dk_usable;
|
||||
PyDictKeyEntry dk_entries[1];
|
||||
Py_ssize_t dk_nentries; /* How many entries are used. */
|
||||
char dk_indices[8]; /* dynamically sized. 8 is minimum. */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
1259
Objects/dictobject.c
1259
Objects/dictobject.c
File diff suppressed because it is too large
Load Diff
|
@ -22,12 +22,6 @@ _Py_GetRefTotal(void)
|
|||
{
|
||||
PyObject *o;
|
||||
Py_ssize_t total = _Py_RefTotal;
|
||||
/* ignore the references to the dummy object of the dicts and sets
|
||||
because they are not reliable and not useful (now that the
|
||||
hash table code is well-tested) */
|
||||
o = _PyDict_Dummy();
|
||||
if (o != NULL)
|
||||
total -= o->ob_refcnt;
|
||||
o = _PySet_Dummy;
|
||||
if (o != NULL)
|
||||
total -= o->ob_refcnt;
|
||||
|
|
|
@ -536,14 +536,17 @@ static Py_ssize_t
|
|||
_odict_get_index_raw(PyODictObject *od, PyObject *key, Py_hash_t hash)
|
||||
{
|
||||
PyObject **value_addr = NULL;
|
||||
PyDictKeyEntry *ep;
|
||||
PyDictKeysObject *keys = ((PyDictObject *)od)->ma_keys;
|
||||
Py_ssize_t ix;
|
||||
|
||||
ep = (keys->dk_lookup)((PyDictObject *)od, key, hash, &value_addr);
|
||||
if (ep == NULL)
|
||||
ix = (keys->dk_lookup)((PyDictObject *)od, key, hash, &value_addr, NULL);
|
||||
if (ix == DKIX_EMPTY) {
|
||||
return keys->dk_nentries; /* index of new entry */
|
||||
}
|
||||
if (ix < 0)
|
||||
return -1;
|
||||
/* We use pointer arithmetic to get the entry's index into the table. */
|
||||
return ep - keys->dk_entries;
|
||||
return ix;
|
||||
}
|
||||
|
||||
/* Replace od->od_fast_nodes with a new table matching the size of dict's. */
|
||||
|
@ -565,7 +568,7 @@ _odict_resize(PyODictObject *od) {
|
|||
/* Copy the current nodes into the table. */
|
||||
_odict_FOREACH(od, node) {
|
||||
i = _odict_get_index_raw(od, _odictnode_KEY(node),
|
||||
_odictnode_HASH(node));
|
||||
_odictnode_HASH(node));
|
||||
if (i < 0) {
|
||||
PyMem_FREE(fast_nodes);
|
||||
return -1;
|
||||
|
|
Loading…
Reference in New Issue