Implement compact dict
Issue #27350: `dict` implementation is changed like PyPy. It is more compact and preserves insertion order. _PyDict_Dummy() function has been removed. Disable test_gdb: python-gdb.py is not updated yet to the new structure of compact dictionaries (issue #28023). Patch written by INADA Naoki.
This commit is contained in:
parent
d8b7770a0e
commit
742da040db
|
@ -343,6 +343,11 @@ Other Language Changes
|
||||||
|
|
||||||
Some smaller changes made to the core Python language are:
|
Some smaller changes made to the core Python language are:
|
||||||
|
|
||||||
|
* `dict` implementation is changed like PyPy. It is more compact and preserves
|
||||||
|
insertion order. :pep:`PEP 468` (Preserving the order of `**kwargs` in a
|
||||||
|
function.) is implemented by this.
|
||||||
|
(Contributed by INADA Naoki in :issue:`27350`.)
|
||||||
|
|
||||||
* Long sequences of repeated traceback lines are now abbreviated as
|
* Long sequences of repeated traceback lines are now abbreviated as
|
||||||
``"[Previous line repeated {count} more times]"`` (see
|
``"[Previous line repeated {count} more times]"`` (see
|
||||||
:ref:`py36-traceback` for an example).
|
:ref:`py36-traceback` for an example).
|
||||||
|
|
|
@ -710,7 +710,6 @@ you can count such references to the type object.)
|
||||||
PyAPI_DATA(Py_ssize_t) _Py_RefTotal;
|
PyAPI_DATA(Py_ssize_t) _Py_RefTotal;
|
||||||
PyAPI_FUNC(void) _Py_NegativeRefcount(const char *fname,
|
PyAPI_FUNC(void) _Py_NegativeRefcount(const char *fname,
|
||||||
int lineno, PyObject *op);
|
int lineno, PyObject *op);
|
||||||
PyAPI_FUNC(PyObject *) _PyDict_Dummy(void);
|
|
||||||
PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void);
|
PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void);
|
||||||
#define _Py_INC_REFTOTAL _Py_RefTotal++
|
#define _Py_INC_REFTOTAL _Py_RefTotal++
|
||||||
#define _Py_DEC_REFTOTAL _Py_RefTotal--
|
#define _Py_DEC_REFTOTAL _Py_RefTotal--
|
||||||
|
|
|
@ -5116,12 +5116,14 @@ class SharedKeyTests(unittest.TestCase):
|
||||||
a, b = A(), B()
|
a, b = A(), B()
|
||||||
self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b)))
|
self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b)))
|
||||||
self.assertLess(sys.getsizeof(vars(a)), sys.getsizeof({}))
|
self.assertLess(sys.getsizeof(vars(a)), sys.getsizeof({}))
|
||||||
a.x, a.y, a.z, a.w = range(4)
|
# Initial hash table can contain at most 5 elements.
|
||||||
|
# Set 6 attributes to cause internal resizing.
|
||||||
|
a.x, a.y, a.z, a.w, a.v, a.u = range(6)
|
||||||
self.assertNotEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b)))
|
self.assertNotEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b)))
|
||||||
a2 = A()
|
a2 = A()
|
||||||
self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(a2)))
|
self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(a2)))
|
||||||
self.assertLess(sys.getsizeof(vars(a)), sys.getsizeof({}))
|
self.assertLess(sys.getsizeof(vars(a)), sys.getsizeof({}))
|
||||||
b.u, b.v, b.w, b.t = range(4)
|
b.u, b.v, b.w, b.t, b.s, b.r = range(6)
|
||||||
self.assertLess(sys.getsizeof(vars(b)), sys.getsizeof({}))
|
self.assertLess(sys.getsizeof(vars(b)), sys.getsizeof({}))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -11,6 +11,9 @@ import sysconfig
|
||||||
import unittest
|
import unittest
|
||||||
import locale
|
import locale
|
||||||
|
|
||||||
|
# FIXME: issue #28023
|
||||||
|
raise unittest.SkipTest("FIXME: issue #28023, compact dict (issue #27350) broke python-gdb.py")
|
||||||
|
|
||||||
# Is this Python configured to support threads?
|
# Is this Python configured to support threads?
|
||||||
try:
|
try:
|
||||||
import _thread
|
import _thread
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import builtins
|
||||||
import contextlib
|
import contextlib
|
||||||
import copy
|
import copy
|
||||||
import gc
|
import gc
|
||||||
|
@ -621,6 +622,25 @@ class PurePythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
|
||||||
OrderedDict = py_coll.OrderedDict
|
OrderedDict = py_coll.OrderedDict
|
||||||
|
|
||||||
|
|
||||||
|
class CPythonBuiltinDictTests(unittest.TestCase):
|
||||||
|
"""Builtin dict preserves insertion order.
|
||||||
|
|
||||||
|
Reuse some of tests in OrderedDict selectively.
|
||||||
|
"""
|
||||||
|
|
||||||
|
module = builtins
|
||||||
|
OrderedDict = dict
|
||||||
|
|
||||||
|
for method in (
|
||||||
|
"test_init test_update test_abc test_clear test_delitem " +
|
||||||
|
"test_setitem test_detect_deletion_during_iteration " +
|
||||||
|
"test_popitem test_reinsert test_override_update " +
|
||||||
|
"test_highly_nested test_highly_nested_subclass " +
|
||||||
|
"test_delitem_hash_collision ").split():
|
||||||
|
setattr(CPythonBuiltinDictTests, method, getattr(OrderedDictTests, method))
|
||||||
|
del method
|
||||||
|
|
||||||
|
|
||||||
@unittest.skipUnless(c_coll, 'requires the C version of the collections module')
|
@unittest.skipUnless(c_coll, 'requires the C version of the collections module')
|
||||||
class CPythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
|
class CPythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
|
||||||
|
|
||||||
|
@ -635,18 +655,19 @@ class CPythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
|
||||||
size = support.calcobjsize
|
size = support.calcobjsize
|
||||||
check = self.check_sizeof
|
check = self.check_sizeof
|
||||||
|
|
||||||
basicsize = size('n2P' + '3PnPn2P') + calcsize('2nPn')
|
basicsize = size('n2P' + '3PnPn2P') + calcsize('2nP2n')
|
||||||
entrysize = calcsize('n2P') + calcsize('P')
|
entrysize = calcsize('n2P')
|
||||||
|
p = calcsize('P')
|
||||||
nodesize = calcsize('Pn2P')
|
nodesize = calcsize('Pn2P')
|
||||||
|
|
||||||
od = OrderedDict()
|
od = OrderedDict()
|
||||||
check(od, basicsize + 8*entrysize)
|
check(od, basicsize + 8*p + 8 + 5*entrysize) # 8byte indicies + 8*2//3 * entry table
|
||||||
od.x = 1
|
od.x = 1
|
||||||
check(od, basicsize + 8*entrysize)
|
check(od, basicsize + 8*p + 8 + 5*entrysize)
|
||||||
od.update([(i, i) for i in range(3)])
|
od.update([(i, i) for i in range(3)])
|
||||||
check(od, basicsize + 8*entrysize + 3*nodesize)
|
check(od, basicsize + 8*p + 8 + 5*entrysize + 3*nodesize)
|
||||||
od.update([(i, i) for i in range(3, 10)])
|
od.update([(i, i) for i in range(3, 10)])
|
||||||
check(od, basicsize + 16*entrysize + 10*nodesize)
|
check(od, basicsize + 16*p + 16 + 10*entrysize + 10*nodesize)
|
||||||
|
|
||||||
check(od.keys(), size('P'))
|
check(od.keys(), size('P'))
|
||||||
check(od.items(), size('P'))
|
check(od.items(), size('P'))
|
||||||
|
|
|
@ -936,9 +936,9 @@ class SizeofTest(unittest.TestCase):
|
||||||
# method-wrapper (descriptor object)
|
# method-wrapper (descriptor object)
|
||||||
check({}.__iter__, size('2P'))
|
check({}.__iter__, size('2P'))
|
||||||
# dict
|
# dict
|
||||||
check({}, size('n2P') + calcsize('2nPn') + 8*calcsize('n2P'))
|
check({}, size('n2P') + calcsize('2nP2n') + 8 + (8*2//3)*calcsize('n2P'))
|
||||||
longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8}
|
longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8}
|
||||||
check(longdict, size('n2P') + calcsize('2nPn') + 16*calcsize('n2P'))
|
check(longdict, size('n2P') + calcsize('2nP2n') + 16 + (16*2//3)*calcsize('n2P'))
|
||||||
# dictionary-keyview
|
# dictionary-keyview
|
||||||
check({}.keys(), size('P'))
|
check({}.keys(), size('P'))
|
||||||
# dictionary-valueview
|
# dictionary-valueview
|
||||||
|
@ -1096,13 +1096,13 @@ class SizeofTest(unittest.TestCase):
|
||||||
'10P' # PySequenceMethods
|
'10P' # PySequenceMethods
|
||||||
'2P' # PyBufferProcs
|
'2P' # PyBufferProcs
|
||||||
'4P')
|
'4P')
|
||||||
# Separate block for PyDictKeysObject with 4 entries
|
# Separate block for PyDictKeysObject with 8 keys and 5 entries
|
||||||
s += calcsize("2nPn") + 4*calcsize("n2P")
|
s += calcsize("2nP2n") + 8 + 5*calcsize("n2P")
|
||||||
# class
|
# class
|
||||||
class newstyleclass(object): pass
|
class newstyleclass(object): pass
|
||||||
check(newstyleclass, s)
|
check(newstyleclass, s)
|
||||||
# dict with shared keys
|
# dict with shared keys
|
||||||
check(newstyleclass().__dict__, size('n2P' + '2nPn'))
|
check(newstyleclass().__dict__, size('n2P' + '2nP2n'))
|
||||||
# unicode
|
# unicode
|
||||||
# each tuple contains a string and its expected character size
|
# each tuple contains a string and its expected character size
|
||||||
# don't put any static strings here, as they may contain
|
# don't put any static strings here, as they may contain
|
||||||
|
|
|
@ -1325,13 +1325,16 @@ class MappingTestCase(TestBase):
|
||||||
o = Object(123456)
|
o = Object(123456)
|
||||||
with testcontext():
|
with testcontext():
|
||||||
n = len(dict)
|
n = len(dict)
|
||||||
dict.popitem()
|
# Since underlaying dict is ordered, first item is popped
|
||||||
|
dict.pop(next(dict.keys()))
|
||||||
self.assertEqual(len(dict), n - 1)
|
self.assertEqual(len(dict), n - 1)
|
||||||
dict[o] = o
|
dict[o] = o
|
||||||
self.assertEqual(len(dict), n)
|
self.assertEqual(len(dict), n)
|
||||||
|
# last item in objects is removed from dict in context shutdown
|
||||||
with testcontext():
|
with testcontext():
|
||||||
self.assertEqual(len(dict), n - 1)
|
self.assertEqual(len(dict), n - 1)
|
||||||
dict.pop(next(dict.keys()))
|
# Then, (o, o) is popped
|
||||||
|
dict.popitem()
|
||||||
self.assertEqual(len(dict), n - 2)
|
self.assertEqual(len(dict), n - 2)
|
||||||
with testcontext():
|
with testcontext():
|
||||||
self.assertEqual(len(dict), n - 3)
|
self.assertEqual(len(dict), n - 3)
|
||||||
|
|
|
@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #27350: `dict` implementation is changed like PyPy. It is more compact
|
||||||
|
and preserves insertion order.
|
||||||
|
|
||||||
- Issue #27911: Remove unnecessary error checks in
|
- Issue #27911: Remove unnecessary error checks in
|
||||||
``exec_builtin_or_dynamic()``.
|
``exec_builtin_or_dynamic()``.
|
||||||
|
|
||||||
|
|
|
@ -8,15 +8,25 @@ typedef struct {
|
||||||
PyObject *me_value; /* This field is only meaningful for combined tables */
|
PyObject *me_value; /* This field is only meaningful for combined tables */
|
||||||
} PyDictKeyEntry;
|
} PyDictKeyEntry;
|
||||||
|
|
||||||
typedef PyDictKeyEntry *(*dict_lookup_func)
|
/* dict_lookup_func() returns index of entry which can be used like DK_ENTRIES(dk)[index].
|
||||||
(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr);
|
* -1 when no entry found, -3 when compare raises error.
|
||||||
|
*/
|
||||||
|
typedef Py_ssize_t (*dict_lookup_func)
|
||||||
|
(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr,
|
||||||
|
Py_ssize_t *hashpos);
|
||||||
|
|
||||||
|
#define DKIX_EMPTY (-1)
|
||||||
|
#define DKIX_DUMMY (-2) /* Used internally */
|
||||||
|
#define DKIX_ERROR (-3)
|
||||||
|
|
||||||
|
/* See dictobject.c for actual layout of DictKeysObject */
|
||||||
struct _dictkeysobject {
|
struct _dictkeysobject {
|
||||||
Py_ssize_t dk_refcnt;
|
Py_ssize_t dk_refcnt;
|
||||||
Py_ssize_t dk_size;
|
Py_ssize_t dk_size;
|
||||||
dict_lookup_func dk_lookup;
|
dict_lookup_func dk_lookup;
|
||||||
Py_ssize_t dk_usable;
|
Py_ssize_t dk_usable;
|
||||||
PyDictKeyEntry dk_entries[1];
|
Py_ssize_t dk_nentries; /* How many entries are used. */
|
||||||
|
char dk_indices[8]; /* dynamically sized. 8 is minimum. */
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
1259
Objects/dictobject.c
1259
Objects/dictobject.c
File diff suppressed because it is too large
Load Diff
|
@ -22,12 +22,6 @@ _Py_GetRefTotal(void)
|
||||||
{
|
{
|
||||||
PyObject *o;
|
PyObject *o;
|
||||||
Py_ssize_t total = _Py_RefTotal;
|
Py_ssize_t total = _Py_RefTotal;
|
||||||
/* ignore the references to the dummy object of the dicts and sets
|
|
||||||
because they are not reliable and not useful (now that the
|
|
||||||
hash table code is well-tested) */
|
|
||||||
o = _PyDict_Dummy();
|
|
||||||
if (o != NULL)
|
|
||||||
total -= o->ob_refcnt;
|
|
||||||
o = _PySet_Dummy;
|
o = _PySet_Dummy;
|
||||||
if (o != NULL)
|
if (o != NULL)
|
||||||
total -= o->ob_refcnt;
|
total -= o->ob_refcnt;
|
||||||
|
|
|
@ -536,14 +536,17 @@ static Py_ssize_t
|
||||||
_odict_get_index_raw(PyODictObject *od, PyObject *key, Py_hash_t hash)
|
_odict_get_index_raw(PyODictObject *od, PyObject *key, Py_hash_t hash)
|
||||||
{
|
{
|
||||||
PyObject **value_addr = NULL;
|
PyObject **value_addr = NULL;
|
||||||
PyDictKeyEntry *ep;
|
|
||||||
PyDictKeysObject *keys = ((PyDictObject *)od)->ma_keys;
|
PyDictKeysObject *keys = ((PyDictObject *)od)->ma_keys;
|
||||||
|
Py_ssize_t ix;
|
||||||
|
|
||||||
ep = (keys->dk_lookup)((PyDictObject *)od, key, hash, &value_addr);
|
ix = (keys->dk_lookup)((PyDictObject *)od, key, hash, &value_addr, NULL);
|
||||||
if (ep == NULL)
|
if (ix == DKIX_EMPTY) {
|
||||||
|
return keys->dk_nentries; /* index of new entry */
|
||||||
|
}
|
||||||
|
if (ix < 0)
|
||||||
return -1;
|
return -1;
|
||||||
/* We use pointer arithmetic to get the entry's index into the table. */
|
/* We use pointer arithmetic to get the entry's index into the table. */
|
||||||
return ep - keys->dk_entries;
|
return ix;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Replace od->od_fast_nodes with a new table matching the size of dict's. */
|
/* Replace od->od_fast_nodes with a new table matching the size of dict's. */
|
||||||
|
@ -565,7 +568,7 @@ _odict_resize(PyODictObject *od) {
|
||||||
/* Copy the current nodes into the table. */
|
/* Copy the current nodes into the table. */
|
||||||
_odict_FOREACH(od, node) {
|
_odict_FOREACH(od, node) {
|
||||||
i = _odict_get_index_raw(od, _odictnode_KEY(node),
|
i = _odict_get_index_raw(od, _odictnode_KEY(node),
|
||||||
_odictnode_HASH(node));
|
_odictnode_HASH(node));
|
||||||
if (i < 0) {
|
if (i < 0) {
|
||||||
PyMem_FREE(fast_nodes);
|
PyMem_FREE(fast_nodes);
|
||||||
return -1;
|
return -1;
|
||||||
|
|
Loading…
Reference in New Issue