Implement compact dict

Issue #27350: `dict` implementation is changed like PyPy. It is more compact
and preserves insertion order.

_PyDict_Dummy() function has been removed.

Disable test_gdb: python-gdb.py is not updated yet to the new structure of
compact dictionaries (issue #28023).

Patch written by INADA Naoki.
This commit is contained in:
Victor Stinner 2016-09-07 17:40:12 -07:00
parent d8b7770a0e
commit 742da040db
12 changed files with 793 additions and 569 deletions

View File

@ -343,6 +343,11 @@ Other Language Changes
Some smaller changes made to the core Python language are: Some smaller changes made to the core Python language are:
* `dict` implementation is changed like PyPy. It is more compact and preserves
insertion order. :pep:`PEP 468` (Preserving the order of `**kwargs` in a
function.) is implemented by this.
(Contributed by INADA Naoki in :issue:`27350`.)
* Long sequences of repeated traceback lines are now abbreviated as * Long sequences of repeated traceback lines are now abbreviated as
``"[Previous line repeated {count} more times]"`` (see ``"[Previous line repeated {count} more times]"`` (see
:ref:`py36-traceback` for an example). :ref:`py36-traceback` for an example).

View File

@ -710,7 +710,6 @@ you can count such references to the type object.)
PyAPI_DATA(Py_ssize_t) _Py_RefTotal; PyAPI_DATA(Py_ssize_t) _Py_RefTotal;
PyAPI_FUNC(void) _Py_NegativeRefcount(const char *fname, PyAPI_FUNC(void) _Py_NegativeRefcount(const char *fname,
int lineno, PyObject *op); int lineno, PyObject *op);
PyAPI_FUNC(PyObject *) _PyDict_Dummy(void);
PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void); PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void);
#define _Py_INC_REFTOTAL _Py_RefTotal++ #define _Py_INC_REFTOTAL _Py_RefTotal++
#define _Py_DEC_REFTOTAL _Py_RefTotal-- #define _Py_DEC_REFTOTAL _Py_RefTotal--

View File

@ -5116,12 +5116,14 @@ class SharedKeyTests(unittest.TestCase):
a, b = A(), B() a, b = A(), B()
self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b))) self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b)))
self.assertLess(sys.getsizeof(vars(a)), sys.getsizeof({})) self.assertLess(sys.getsizeof(vars(a)), sys.getsizeof({}))
a.x, a.y, a.z, a.w = range(4) # Initial hash table can contain at most 5 elements.
# Set 6 attributes to cause internal resizing.
a.x, a.y, a.z, a.w, a.v, a.u = range(6)
self.assertNotEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b))) self.assertNotEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(b)))
a2 = A() a2 = A()
self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(a2))) self.assertEqual(sys.getsizeof(vars(a)), sys.getsizeof(vars(a2)))
self.assertLess(sys.getsizeof(vars(a)), sys.getsizeof({})) self.assertLess(sys.getsizeof(vars(a)), sys.getsizeof({}))
b.u, b.v, b.w, b.t = range(4) b.u, b.v, b.w, b.t, b.s, b.r = range(6)
self.assertLess(sys.getsizeof(vars(b)), sys.getsizeof({})) self.assertLess(sys.getsizeof(vars(b)), sys.getsizeof({}))

View File

@ -11,6 +11,9 @@ import sysconfig
import unittest import unittest
import locale import locale
# FIXME: issue #28023
raise unittest.SkipTest("FIXME: issue #28023, compact dict (issue #27350) broke python-gdb.py")
# Is this Python configured to support threads? # Is this Python configured to support threads?
try: try:
import _thread import _thread

View File

@ -1,3 +1,4 @@
import builtins
import contextlib import contextlib
import copy import copy
import gc import gc
@ -621,6 +622,25 @@ class PurePythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
OrderedDict = py_coll.OrderedDict OrderedDict = py_coll.OrderedDict
class CPythonBuiltinDictTests(unittest.TestCase):
"""Builtin dict preserves insertion order.
Reuse some of tests in OrderedDict selectively.
"""
module = builtins
OrderedDict = dict
for method in (
"test_init test_update test_abc test_clear test_delitem " +
"test_setitem test_detect_deletion_during_iteration " +
"test_popitem test_reinsert test_override_update " +
"test_highly_nested test_highly_nested_subclass " +
"test_delitem_hash_collision ").split():
setattr(CPythonBuiltinDictTests, method, getattr(OrderedDictTests, method))
del method
@unittest.skipUnless(c_coll, 'requires the C version of the collections module') @unittest.skipUnless(c_coll, 'requires the C version of the collections module')
class CPythonOrderedDictTests(OrderedDictTests, unittest.TestCase): class CPythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
@ -635,18 +655,19 @@ class CPythonOrderedDictTests(OrderedDictTests, unittest.TestCase):
size = support.calcobjsize size = support.calcobjsize
check = self.check_sizeof check = self.check_sizeof
basicsize = size('n2P' + '3PnPn2P') + calcsize('2nPn') basicsize = size('n2P' + '3PnPn2P') + calcsize('2nP2n')
entrysize = calcsize('n2P') + calcsize('P') entrysize = calcsize('n2P')
p = calcsize('P')
nodesize = calcsize('Pn2P') nodesize = calcsize('Pn2P')
od = OrderedDict() od = OrderedDict()
check(od, basicsize + 8*entrysize) check(od, basicsize + 8*p + 8 + 5*entrysize) # 8byte indicies + 8*2//3 * entry table
od.x = 1 od.x = 1
check(od, basicsize + 8*entrysize) check(od, basicsize + 8*p + 8 + 5*entrysize)
od.update([(i, i) for i in range(3)]) od.update([(i, i) for i in range(3)])
check(od, basicsize + 8*entrysize + 3*nodesize) check(od, basicsize + 8*p + 8 + 5*entrysize + 3*nodesize)
od.update([(i, i) for i in range(3, 10)]) od.update([(i, i) for i in range(3, 10)])
check(od, basicsize + 16*entrysize + 10*nodesize) check(od, basicsize + 16*p + 16 + 10*entrysize + 10*nodesize)
check(od.keys(), size('P')) check(od.keys(), size('P'))
check(od.items(), size('P')) check(od.items(), size('P'))

View File

@ -936,9 +936,9 @@ class SizeofTest(unittest.TestCase):
# method-wrapper (descriptor object) # method-wrapper (descriptor object)
check({}.__iter__, size('2P')) check({}.__iter__, size('2P'))
# dict # dict
check({}, size('n2P') + calcsize('2nPn') + 8*calcsize('n2P')) check({}, size('n2P') + calcsize('2nP2n') + 8 + (8*2//3)*calcsize('n2P'))
longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8} longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8}
check(longdict, size('n2P') + calcsize('2nPn') + 16*calcsize('n2P')) check(longdict, size('n2P') + calcsize('2nP2n') + 16 + (16*2//3)*calcsize('n2P'))
# dictionary-keyview # dictionary-keyview
check({}.keys(), size('P')) check({}.keys(), size('P'))
# dictionary-valueview # dictionary-valueview
@ -1096,13 +1096,13 @@ class SizeofTest(unittest.TestCase):
'10P' # PySequenceMethods '10P' # PySequenceMethods
'2P' # PyBufferProcs '2P' # PyBufferProcs
'4P') '4P')
# Separate block for PyDictKeysObject with 4 entries # Separate block for PyDictKeysObject with 8 keys and 5 entries
s += calcsize("2nPn") + 4*calcsize("n2P") s += calcsize("2nP2n") + 8 + 5*calcsize("n2P")
# class # class
class newstyleclass(object): pass class newstyleclass(object): pass
check(newstyleclass, s) check(newstyleclass, s)
# dict with shared keys # dict with shared keys
check(newstyleclass().__dict__, size('n2P' + '2nPn')) check(newstyleclass().__dict__, size('n2P' + '2nP2n'))
# unicode # unicode
# each tuple contains a string and its expected character size # each tuple contains a string and its expected character size
# don't put any static strings here, as they may contain # don't put any static strings here, as they may contain

View File

@ -1325,13 +1325,16 @@ class MappingTestCase(TestBase):
o = Object(123456) o = Object(123456)
with testcontext(): with testcontext():
n = len(dict) n = len(dict)
dict.popitem() # Since underlaying dict is ordered, first item is popped
dict.pop(next(dict.keys()))
self.assertEqual(len(dict), n - 1) self.assertEqual(len(dict), n - 1)
dict[o] = o dict[o] = o
self.assertEqual(len(dict), n) self.assertEqual(len(dict), n)
# last item in objects is removed from dict in context shutdown
with testcontext(): with testcontext():
self.assertEqual(len(dict), n - 1) self.assertEqual(len(dict), n - 1)
dict.pop(next(dict.keys())) # Then, (o, o) is popped
dict.popitem()
self.assertEqual(len(dict), n - 2) self.assertEqual(len(dict), n - 2)
with testcontext(): with testcontext():
self.assertEqual(len(dict), n - 3) self.assertEqual(len(dict), n - 3)

View File

@ -10,6 +10,9 @@ What's New in Python 3.6.0 beta 1
Core and Builtins Core and Builtins
----------------- -----------------
- Issue #27350: `dict` implementation is changed like PyPy. It is more compact
and preserves insertion order.
- Issue #27911: Remove unnecessary error checks in - Issue #27911: Remove unnecessary error checks in
``exec_builtin_or_dynamic()``. ``exec_builtin_or_dynamic()``.

View File

@ -8,15 +8,25 @@ typedef struct {
PyObject *me_value; /* This field is only meaningful for combined tables */ PyObject *me_value; /* This field is only meaningful for combined tables */
} PyDictKeyEntry; } PyDictKeyEntry;
typedef PyDictKeyEntry *(*dict_lookup_func) /* dict_lookup_func() returns index of entry which can be used like DK_ENTRIES(dk)[index].
(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr); * -1 when no entry found, -3 when compare raises error.
*/
typedef Py_ssize_t (*dict_lookup_func)
(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject ***value_addr,
Py_ssize_t *hashpos);
#define DKIX_EMPTY (-1)
#define DKIX_DUMMY (-2) /* Used internally */
#define DKIX_ERROR (-3)
/* See dictobject.c for actual layout of DictKeysObject */
struct _dictkeysobject { struct _dictkeysobject {
Py_ssize_t dk_refcnt; Py_ssize_t dk_refcnt;
Py_ssize_t dk_size; Py_ssize_t dk_size;
dict_lookup_func dk_lookup; dict_lookup_func dk_lookup;
Py_ssize_t dk_usable; Py_ssize_t dk_usable;
PyDictKeyEntry dk_entries[1]; Py_ssize_t dk_nentries; /* How many entries are used. */
char dk_indices[8]; /* dynamically sized. 8 is minimum. */
}; };
#endif #endif

File diff suppressed because it is too large Load Diff

View File

@ -22,12 +22,6 @@ _Py_GetRefTotal(void)
{ {
PyObject *o; PyObject *o;
Py_ssize_t total = _Py_RefTotal; Py_ssize_t total = _Py_RefTotal;
/* ignore the references to the dummy object of the dicts and sets
because they are not reliable and not useful (now that the
hash table code is well-tested) */
o = _PyDict_Dummy();
if (o != NULL)
total -= o->ob_refcnt;
o = _PySet_Dummy; o = _PySet_Dummy;
if (o != NULL) if (o != NULL)
total -= o->ob_refcnt; total -= o->ob_refcnt;

View File

@ -536,14 +536,17 @@ static Py_ssize_t
_odict_get_index_raw(PyODictObject *od, PyObject *key, Py_hash_t hash) _odict_get_index_raw(PyODictObject *od, PyObject *key, Py_hash_t hash)
{ {
PyObject **value_addr = NULL; PyObject **value_addr = NULL;
PyDictKeyEntry *ep;
PyDictKeysObject *keys = ((PyDictObject *)od)->ma_keys; PyDictKeysObject *keys = ((PyDictObject *)od)->ma_keys;
Py_ssize_t ix;
ep = (keys->dk_lookup)((PyDictObject *)od, key, hash, &value_addr); ix = (keys->dk_lookup)((PyDictObject *)od, key, hash, &value_addr, NULL);
if (ep == NULL) if (ix == DKIX_EMPTY) {
return keys->dk_nentries; /* index of new entry */
}
if (ix < 0)
return -1; return -1;
/* We use pointer arithmetic to get the entry's index into the table. */ /* We use pointer arithmetic to get the entry's index into the table. */
return ep - keys->dk_entries; return ix;
} }
/* Replace od->od_fast_nodes with a new table matching the size of dict's. */ /* Replace od->od_fast_nodes with a new table matching the size of dict's. */
@ -565,7 +568,7 @@ _odict_resize(PyODictObject *od) {
/* Copy the current nodes into the table. */ /* Copy the current nodes into the table. */
_odict_FOREACH(od, node) { _odict_FOREACH(od, node) {
i = _odict_get_index_raw(od, _odictnode_KEY(node), i = _odict_get_index_raw(od, _odictnode_KEY(node),
_odictnode_HASH(node)); _odictnode_HASH(node));
if (i < 0) { if (i < 0) {
PyMem_FREE(fast_nodes); PyMem_FREE(fast_nodes);
return -1; return -1;