Implement PEP 412: Key-sharing dictionaries (closes #13903)
Patch from Mark Shannon.
This commit is contained in:
parent
80d07f8251
commit
7d95e40721
|
@ -13,78 +13,20 @@ extern "C" {
|
|||
tuning dictionaries, and several ideas for possible optimizations.
|
||||
*/
|
||||
|
||||
/*
|
||||
There are three kinds of slots in the table:
|
||||
|
||||
1. Unused. me_key == me_value == NULL
|
||||
Does not hold an active (key, value) pair now and never did. Unused can
|
||||
transition to Active upon key insertion. This is the only case in which
|
||||
me_key is NULL, and is each slot's initial state.
|
||||
|
||||
2. Active. me_key != NULL and me_key != dummy and me_value != NULL
|
||||
Holds an active (key, value) pair. Active can transition to Dummy upon
|
||||
key deletion. This is the only case in which me_value != NULL.
|
||||
|
||||
3. Dummy. me_key == dummy and me_value == NULL
|
||||
Previously held an active (key, value) pair, but that was deleted and an
|
||||
active pair has not yet overwritten the slot. Dummy can transition to
|
||||
Active upon key insertion. Dummy slots cannot be made Unused again
|
||||
(cannot have me_key set to NULL), else the probe sequence in case of
|
||||
collision would have no way to know they were once active.
|
||||
|
||||
Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to
|
||||
hold a search finger. The me_hash field of Unused or Dummy slots has no
|
||||
meaning otherwise.
|
||||
*/
|
||||
|
||||
/* PyDict_MINSIZE is the minimum size of a dictionary. This many slots are
|
||||
* allocated directly in the dict object (in the ma_smalltable member).
|
||||
* It must be a power of 2, and at least 4. 8 allows dicts with no more
|
||||
* than 5 active entries to live in ma_smalltable (and so avoid an
|
||||
* additional malloc); instrumentation suggested this suffices for the
|
||||
* majority of dicts (consisting mostly of usually-small instance dicts and
|
||||
* usually-small dicts created to pass keyword arguments).
|
||||
*/
|
||||
#ifndef Py_LIMITED_API
|
||||
#define PyDict_MINSIZE 8
|
||||
|
||||
typedef struct _dictkeysobject PyDictKeysObject;
|
||||
|
||||
/* The ma_values pointer is NULL for a combined table
|
||||
* or points to an array of PyObject* for a split table
|
||||
*/
|
||||
typedef struct {
|
||||
/* Cached hash code of me_key. */
|
||||
Py_hash_t me_hash;
|
||||
PyObject *me_key;
|
||||
PyObject *me_value;
|
||||
} PyDictEntry;
|
||||
|
||||
/*
|
||||
To ensure the lookup algorithm terminates, there must be at least one Unused
|
||||
slot (NULL key) in the table.
|
||||
The value ma_fill is the number of non-NULL keys (sum of Active and Dummy);
|
||||
ma_used is the number of non-NULL, non-dummy keys (== the number of non-NULL
|
||||
values == the number of Active items).
|
||||
To avoid slowing down lookups on a near-full table, we resize the table when
|
||||
it's two-thirds full.
|
||||
*/
|
||||
typedef struct _dictobject PyDictObject;
|
||||
struct _dictobject {
|
||||
PyObject_HEAD
|
||||
Py_ssize_t ma_fill; /* # Active + # Dummy */
|
||||
Py_ssize_t ma_used; /* # Active */
|
||||
Py_ssize_t ma_used;
|
||||
PyDictKeysObject *ma_keys;
|
||||
PyObject **ma_values;
|
||||
} PyDictObject;
|
||||
|
||||
/* The table contains ma_mask + 1 slots, and that's a power of 2.
|
||||
* We store the mask instead of the size because the mask is more
|
||||
* frequently needed.
|
||||
*/
|
||||
Py_ssize_t ma_mask;
|
||||
|
||||
/* ma_table points to ma_smalltable for small tables, else to
|
||||
* additional malloc'ed memory. ma_table is never NULL! This rule
|
||||
* saves repeated runtime null-tests in the workhorse getitem and
|
||||
* setitem calls.
|
||||
*/
|
||||
PyDictEntry *ma_table;
|
||||
PyDictEntry *(*ma_lookup)(PyDictObject *mp, PyObject *key, Py_hash_t hash);
|
||||
PyDictEntry ma_smalltable[PyDict_MINSIZE];
|
||||
};
|
||||
#endif /* Py_LIMITED_API */
|
||||
|
||||
PyAPI_DATA(PyTypeObject) PyDict_Type;
|
||||
|
@ -117,6 +59,8 @@ PyAPI_FUNC(void) PyDict_Clear(PyObject *mp);
|
|||
PyAPI_FUNC(int) PyDict_Next(
|
||||
PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value);
|
||||
#ifndef Py_LIMITED_API
|
||||
PyDictKeysObject *_PyDict_NewKeysForClass(void);
|
||||
PyAPI_FUNC(PyObject *) PyObject_GenericGetDict(PyObject *, void *);
|
||||
PyAPI_FUNC(int) _PyDict_Next(
|
||||
PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value, Py_hash_t *hash);
|
||||
#endif
|
||||
|
@ -131,6 +75,7 @@ PyAPI_FUNC(int) _PyDict_Contains(PyObject *mp, PyObject *key, Py_hash_t hash);
|
|||
PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused);
|
||||
PyAPI_FUNC(void) _PyDict_MaybeUntrack(PyObject *mp);
|
||||
PyAPI_FUNC(int) _PyDict_HasOnlyStringKeys(PyObject *mp);
|
||||
#define _PyDict_HasSplitTable(d) ((d)->ma_values != NULL)
|
||||
|
||||
PyAPI_FUNC(int) PyDict_ClearFreeList(void);
|
||||
#endif
|
||||
|
@ -162,6 +107,11 @@ PyAPI_FUNC(int) PyDict_SetItemString(PyObject *dp, const char *key, PyObject *it
|
|||
PyAPI_FUNC(int) _PyDict_SetItemId(PyObject *dp, struct _Py_Identifier *key, PyObject *item);
|
||||
PyAPI_FUNC(int) PyDict_DelItemString(PyObject *dp, const char *key);
|
||||
|
||||
#ifndef Py_LIMITED_API
|
||||
int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject **dictptr, PyObject *name, PyObject *value);
|
||||
PyObject *_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -449,6 +449,7 @@ typedef struct _heaptypeobject {
|
|||
see add_operators() in typeobject.c . */
|
||||
PyBufferProcs as_buffer;
|
||||
PyObject *ht_name, *ht_slots, *ht_qualname;
|
||||
struct _dictkeysobject *ht_cached_keys;
|
||||
/* here are optional user slots, followed by the members. */
|
||||
} PyHeapTypeObject;
|
||||
|
||||
|
@ -517,7 +518,6 @@ PyAPI_FUNC(PyObject *) _PyObject_NextNotImplemented(PyObject *);
|
|||
PyAPI_FUNC(PyObject *) PyObject_GenericGetAttr(PyObject *, PyObject *);
|
||||
PyAPI_FUNC(int) PyObject_GenericSetAttr(PyObject *,
|
||||
PyObject *, PyObject *);
|
||||
PyAPI_FUNC(PyObject *) PyObject_GenericGetDict(PyObject *, void *);
|
||||
PyAPI_FUNC(int) PyObject_GenericSetDict(PyObject *, PyObject *, void *);
|
||||
PyAPI_FUNC(Py_hash_t) PyObject_Hash(PyObject *);
|
||||
PyAPI_FUNC(Py_hash_t) PyObject_HashNotImplemented(PyObject *);
|
||||
|
|
|
@ -321,6 +321,27 @@ class DictTest(unittest.TestCase):
|
|||
self.assertEqual(hashed2.hash_count, 1)
|
||||
self.assertEqual(hashed1.eq_count + hashed2.eq_count, 1)
|
||||
|
||||
def test_setitem_atomic_at_resize(self):
|
||||
class Hashed(object):
|
||||
def __init__(self):
|
||||
self.hash_count = 0
|
||||
self.eq_count = 0
|
||||
def __hash__(self):
|
||||
self.hash_count += 1
|
||||
return 42
|
||||
def __eq__(self, other):
|
||||
self.eq_count += 1
|
||||
return id(self) == id(other)
|
||||
hashed1 = Hashed()
|
||||
# 5 items
|
||||
y = {hashed1: 5, 0: 0, 1: 1, 2: 2, 3: 3}
|
||||
hashed2 = Hashed()
|
||||
# 6th item forces a resize
|
||||
y[hashed2] = []
|
||||
self.assertEqual(hashed1.hash_count, 1)
|
||||
self.assertEqual(hashed2.hash_count, 1)
|
||||
self.assertEqual(hashed1.eq_count + hashed2.eq_count, 1)
|
||||
|
||||
def test_popitem(self):
|
||||
# dict.popitem()
|
||||
for copymode in -1, +1:
|
||||
|
|
|
@ -219,6 +219,8 @@ class QueryTestCase(unittest.TestCase):
|
|||
others.should.not.be: like.this}"""
|
||||
self.assertEqual(DottedPrettyPrinter().pformat(o), exp)
|
||||
|
||||
@unittest.expectedFailure
|
||||
#See http://bugs.python.org/issue13907
|
||||
@test.support.cpython_only
|
||||
def test_set_reprs(self):
|
||||
# This test creates a complex arrangement of frozensets and
|
||||
|
@ -241,10 +243,12 @@ class QueryTestCase(unittest.TestCase):
|
|||
# Consequently, this test is fragile and
|
||||
# implementation-dependent. Small changes to Python's sort
|
||||
# algorithm cause the test to fail when it should pass.
|
||||
# XXX Or changes to the dictionary implmentation...
|
||||
|
||||
self.assertEqual(pprint.pformat(set()), 'set()')
|
||||
self.assertEqual(pprint.pformat(set(range(3))), '{0, 1, 2}')
|
||||
self.assertEqual(pprint.pformat(frozenset()), 'frozenset()')
|
||||
|
||||
self.assertEqual(pprint.pformat(frozenset(range(3))), 'frozenset({0, 1, 2})')
|
||||
cube_repr_tgt = """\
|
||||
{frozenset(): frozenset({frozenset({2}), frozenset({0}), frozenset({1})}),
|
||||
|
|
|
@ -687,9 +687,9 @@ class SizeofTest(unittest.TestCase):
|
|||
# method-wrapper (descriptor object)
|
||||
check({}.__iter__, size(h + '2P'))
|
||||
# dict
|
||||
check({}, size(h + '3P2P' + 8*'P2P'))
|
||||
check({}, size(h + '3P' + '4P' + 8*'P2P'))
|
||||
longdict = {1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8}
|
||||
check(longdict, size(h + '3P2P' + 8*'P2P') + 16*size('P2P'))
|
||||
check(longdict, size(h + '3P' + '4P') + 16*size('P2P'))
|
||||
# dictionary-keyiterator
|
||||
check({}.keys(), size(h + 'P'))
|
||||
# dictionary-valueiterator
|
||||
|
@ -831,7 +831,7 @@ class SizeofTest(unittest.TestCase):
|
|||
# type
|
||||
# (PyTypeObject + PyNumberMethods + PyMappingMethods +
|
||||
# PySequenceMethods + PyBufferProcs)
|
||||
s = size(vh + 'P2P15Pl4PP9PP11PI') + size('16Pi17P 3P 10P 2P 3P')
|
||||
s = size(vh + 'P2P15Pl4PP9PP11PIP') + size('16Pi17P 3P 10P 2P 3P')
|
||||
check(int, s)
|
||||
# class
|
||||
class newstyleclass(object): pass
|
||||
|
|
|
@ -10,6 +10,10 @@ What's New in Python 3.3.0 Alpha 3?
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #13903: Implement PEP 412. Individual dictionary instances can now share
|
||||
their keys with other dictionaries. Classes take advantage of this to share
|
||||
their instance dictionary keys for improved memory and performance.
|
||||
|
||||
- Issue #14630: Fix a memory access bug for instances of a subclass of int
|
||||
with value 0.
|
||||
|
||||
|
|
|
@ -1,7 +1,6 @@
|
|||
NOTES ON OPTIMIZING DICTIONARIES
|
||||
NOTES ON DICTIONARIES
|
||||
================================
|
||||
|
||||
|
||||
Principal Use Cases for Dictionaries
|
||||
------------------------------------
|
||||
|
||||
|
@ -21,7 +20,7 @@ Instance attribute lookup and Global variables
|
|||
|
||||
Builtins
|
||||
Frequent reads. Almost never written.
|
||||
Size 126 interned strings (as of Py2.3b1).
|
||||
About 150 interned strings (as of Py3.3).
|
||||
A few keys are accessed much more frequently than others.
|
||||
|
||||
Uniquification
|
||||
|
@ -59,44 +58,43 @@ Dynamic Mappings
|
|||
Characterized by deletions interspersed with adds and replacements.
|
||||
Performance benefits greatly from the re-use of dummy entries.
|
||||
|
||||
Data Layout
|
||||
-----------
|
||||
|
||||
Data Layout (assuming a 32-bit box with 64 bytes per cache line)
|
||||
----------------------------------------------------------------
|
||||
|
||||
Smalldicts (8 entries) are attached to the dictobject structure
|
||||
and the whole group nearly fills two consecutive cache lines.
|
||||
|
||||
Larger dicts use the first half of the dictobject structure (one cache
|
||||
line) and a separate, continuous block of entries (at 12 bytes each
|
||||
for a total of 5.333 entries per cache line).
|
||||
Dictionaries are composed of 3 components:
|
||||
The dictobject struct itself
|
||||
A dict-keys object (keys & hashes)
|
||||
A values array
|
||||
|
||||
|
||||
Tunable Dictionary Parameters
|
||||
-----------------------------
|
||||
|
||||
* PyDict_MINSIZE. Currently set to 8.
|
||||
Must be a power of two. New dicts have to zero-out every cell.
|
||||
Each additional 8 consumes 1.5 cache lines. Increasing improves
|
||||
the sparseness of small dictionaries but costs time to read in
|
||||
the additional cache lines if they are not already in cache.
|
||||
That case is common when keyword arguments are passed.
|
||||
* PyDict_STARTSIZE. Starting size of dict (unless an instance dict).
|
||||
Currently set to 8. Must be a power of two.
|
||||
New dicts have to zero-out every cell.
|
||||
Increasing improves the sparseness of small dictionaries but costs
|
||||
time to read in the additional cache lines if they are not already
|
||||
in cache. That case is common when keyword arguments are passed.
|
||||
Prior to version 3.3, PyDict_MINSIZE was used as the starting size
|
||||
of a new dict.
|
||||
|
||||
* Maximum dictionary load in PyDict_SetItem. Currently set to 2/3.
|
||||
Increasing this ratio makes dictionaries more dense resulting
|
||||
in more collisions. Decreasing it improves sparseness at the
|
||||
expense of spreading entries over more cache lines and at the
|
||||
* PyDict_MINSIZE. Minimum size of a dict.
|
||||
Currently set to 4 (to keep instance dicts small).
|
||||
Must be a power of two. Prior to version 3.3, PyDict_MINSIZE was
|
||||
set to 8.
|
||||
|
||||
* USABLE_FRACTION. Maximum dictionary load in PyDict_SetItem.
|
||||
Currently set to 2/3. Increasing this ratio makes dictionaries more
|
||||
dense resulting in more collisions. Decreasing it improves sparseness
|
||||
at the expense of spreading entries over more cache lines and at the
|
||||
cost of total memory consumed.
|
||||
|
||||
The load test occurs in highly time sensitive code. Efforts
|
||||
to make the test more complex (for example, varying the load
|
||||
for different sizes) have degraded performance.
|
||||
|
||||
* Growth rate upon hitting maximum load. Currently set to *2.
|
||||
Raising this to *4 results in half the number of resizes,
|
||||
less effort to resize, better sparseness for some (but not
|
||||
all dict sizes), and potentially doubles memory consumption
|
||||
depending on the size of the dictionary. Setting to *4
|
||||
eliminates every other resize step.
|
||||
Raising this to *4 results in half the number of resizes, less
|
||||
effort to resize, better sparseness for some (but not all dict sizes),
|
||||
and potentially doubles memory consumption depending on the size of
|
||||
the dictionary. Setting to *4 eliminates every other resize step.
|
||||
|
||||
* Maximum sparseness (minimum dictionary load). What percentage
|
||||
of entries can be unused before the dictionary shrinks to
|
||||
|
@ -126,8 +124,8 @@ __iter__(), iterkeys(), iteritems(), itervalues(), and update().
|
|||
Also, every dictionary iterates at least twice, once for the memset()
|
||||
when it is created and once by dealloc().
|
||||
|
||||
Dictionary operations involving only a single key can be O(1) unless
|
||||
resizing is possible. By checking for a resize only when the
|
||||
Dictionary operations involving only a single key can be O(1) unless
|
||||
resizing is possible. By checking for a resize only when the
|
||||
dictionary can grow (and may *require* resizing), other operations
|
||||
remain O(1), and the odds of resize thrashing or memory fragmentation
|
||||
are reduced. In particular, an algorithm that empties a dictionary
|
||||
|
@ -135,136 +133,51 @@ by repeatedly invoking .pop will see no resizing, which might
|
|||
not be necessary at all because the dictionary is eventually
|
||||
discarded entirely.
|
||||
|
||||
The key differences between this implementation and earlier versions are:
|
||||
1. The table can be split into two parts, the keys and the values.
|
||||
|
||||
2. There is an additional key-value combination: (key, NULL).
|
||||
Unlike (<dummy>, NULL) which represents a deleted value, (key, NULL)
|
||||
represented a yet to be inserted value. This combination can only occur
|
||||
when the table is split.
|
||||
|
||||
3. No small table embedded in the dict,
|
||||
as this would make sharing of key-tables impossible.
|
||||
|
||||
|
||||
These changes have the following consequences.
|
||||
1. General dictionaries are slightly larger.
|
||||
|
||||
2. All object dictionaries of a single class can share a single key-table,
|
||||
saving about 60% memory for such cases.
|
||||
|
||||
Results of Cache Locality Experiments
|
||||
-------------------------------------
|
||||
--------------------------------------
|
||||
|
||||
When an entry is retrieved from memory, 4.333 adjacent entries are also
|
||||
retrieved into a cache line. Since accessing items in cache is *much*
|
||||
cheaper than a cache miss, an enticing idea is to probe the adjacent
|
||||
entries as a first step in collision resolution. Unfortunately, the
|
||||
introduction of any regularity into collision searches results in more
|
||||
collisions than the current random chaining approach.
|
||||
Experiments on an earlier design of dictionary, in which all tables were
|
||||
combined, showed the following:
|
||||
|
||||
Exploiting cache locality at the expense of additional collisions fails
|
||||
to payoff when the entries are already loaded in cache (the expense
|
||||
is paid with no compensating benefit). This occurs in small dictionaries
|
||||
where the whole dictionary fits into a pair of cache lines. It also
|
||||
occurs frequently in large dictionaries which have a common access pattern
|
||||
where some keys are accessed much more frequently than others. The
|
||||
more popular entries *and* their collision chains tend to remain in cache.
|
||||
When an entry is retrieved from memory, several adjacent entries are also
|
||||
retrieved into a cache line. Since accessing items in cache is *much*
|
||||
cheaper than a cache miss, an enticing idea is to probe the adjacent
|
||||
entries as a first step in collision resolution. Unfortunately, the
|
||||
introduction of any regularity into collision searches results in more
|
||||
collisions than the current random chaining approach.
|
||||
|
||||
To exploit cache locality, change the collision resolution section
|
||||
in lookdict() and lookdict_string(). Set i^=1 at the top of the
|
||||
loop and move the i = (i << 2) + i + perturb + 1 to an unrolled
|
||||
version of the loop.
|
||||
Exploiting cache locality at the expense of additional collisions fails
|
||||
to payoff when the entries are already loaded in cache (the expense
|
||||
is paid with no compensating benefit). This occurs in small dictionaries
|
||||
where the whole dictionary fits into a pair of cache lines. It also
|
||||
occurs frequently in large dictionaries which have a common access pattern
|
||||
where some keys are accessed much more frequently than others. The
|
||||
more popular entries *and* their collision chains tend to remain in cache.
|
||||
|
||||
This optimization strategy can be leveraged in several ways:
|
||||
To exploit cache locality, change the collision resolution section
|
||||
in lookdict() and lookdict_string(). Set i^=1 at the top of the
|
||||
loop and move the i = (i << 2) + i + perturb + 1 to an unrolled
|
||||
version of the loop.
|
||||
|
||||
* If the dictionary is kept sparse (through the tunable parameters),
|
||||
then the occurrence of additional collisions is lessened.
|
||||
|
||||
* If lookdict() and lookdict_string() are specialized for small dicts
|
||||
and for largedicts, then the versions for large_dicts can be given
|
||||
an alternate search strategy without increasing collisions in small dicts
|
||||
which already have the maximum benefit of cache locality.
|
||||
|
||||
* If the use case for a dictionary is known to have a random key
|
||||
access pattern (as opposed to a more common pattern with a Zipf's law
|
||||
distribution), then there will be more benefit for large dictionaries
|
||||
because any given key is no more likely than another to already be
|
||||
in cache.
|
||||
|
||||
* In use cases with paired accesses to the same key, the second access
|
||||
is always in cache and gets no benefit from efforts to further improve
|
||||
cache locality.
|
||||
|
||||
Optimizing the Search of Small Dictionaries
|
||||
-------------------------------------------
|
||||
|
||||
If lookdict() and lookdict_string() are specialized for smaller dictionaries,
|
||||
then a custom search approach can be implemented that exploits the small
|
||||
search space and cache locality.
|
||||
|
||||
* The simplest example is a linear search of contiguous entries. This is
|
||||
simple to implement, guaranteed to terminate rapidly, never searches
|
||||
the same entry twice, and precludes the need to check for dummy entries.
|
||||
|
||||
* A more advanced example is a self-organizing search so that the most
|
||||
frequently accessed entries get probed first. The organization
|
||||
adapts if the access pattern changes over time. Treaps are ideally
|
||||
suited for self-organization with the most common entries at the
|
||||
top of the heap and a rapid binary search pattern. Most probes and
|
||||
results are all located at the top of the tree allowing them all to
|
||||
be located in one or two cache lines.
|
||||
|
||||
* Also, small dictionaries may be made more dense, perhaps filling all
|
||||
eight cells to take the maximum advantage of two cache lines.
|
||||
For split tables, the above will apply to the keys, but the value will
|
||||
always be in a different cache line from the key.
|
||||
|
||||
|
||||
Strategy Pattern
|
||||
----------------
|
||||
|
||||
Consider allowing the user to set the tunable parameters or to select a
|
||||
particular search method. Since some dictionary use cases have known
|
||||
sizes and access patterns, the user may be able to provide useful hints.
|
||||
|
||||
1) For example, if membership testing or lookups dominate runtime and memory
|
||||
is not at a premium, the user may benefit from setting the maximum load
|
||||
ratio at 5% or 10% instead of the usual 66.7%. This will sharply
|
||||
curtail the number of collisions but will increase iteration time.
|
||||
The builtin namespace is a prime example of a dictionary that can
|
||||
benefit from being highly sparse.
|
||||
|
||||
2) Dictionary creation time can be shortened in cases where the ultimate
|
||||
size of the dictionary is known in advance. The dictionary can be
|
||||
pre-sized so that no resize operations are required during creation.
|
||||
Not only does this save resizes, but the key insertion will go
|
||||
more quickly because the first half of the keys will be inserted into
|
||||
a more sparse environment than before. The preconditions for this
|
||||
strategy arise whenever a dictionary is created from a key or item
|
||||
sequence and the number of *unique* keys is known.
|
||||
|
||||
3) If the key space is large and the access pattern is known to be random,
|
||||
then search strategies exploiting cache locality can be fruitful.
|
||||
The preconditions for this strategy arise in simulations and
|
||||
numerical analysis.
|
||||
|
||||
4) If the keys are fixed and the access pattern strongly favors some of
|
||||
the keys, then the entries can be stored contiguously and accessed
|
||||
with a linear search or treap. This exploits knowledge of the data,
|
||||
cache locality, and a simplified search routine. It also eliminates
|
||||
the need to test for dummy entries on each probe. The preconditions
|
||||
for this strategy arise in symbol tables and in the builtin dictionary.
|
||||
|
||||
|
||||
Readonly Dictionaries
|
||||
---------------------
|
||||
Some dictionary use cases pass through a build stage and then move to a
|
||||
more heavily exercised lookup stage with no further changes to the
|
||||
dictionary.
|
||||
|
||||
An idea that emerged on python-dev is to be able to convert a dictionary
|
||||
to a read-only state. This can help prevent programming errors and also
|
||||
provide knowledge that can be exploited for lookup optimization.
|
||||
|
||||
The dictionary can be immediately rebuilt (eliminating dummy entries),
|
||||
resized (to an appropriate level of sparseness), and the keys can be
|
||||
jostled (to minimize collisions). The lookdict() routine can then
|
||||
eliminate the test for dummy entries (saving about 1/4 of the time
|
||||
spent in the collision resolution loop).
|
||||
|
||||
An additional possibility is to insert links into the empty spaces
|
||||
so that dictionary iteration can proceed in len(d) steps instead of
|
||||
(mp->mask + 1) steps. Alternatively, a separate tuple of keys can be
|
||||
kept just for iteration.
|
||||
|
||||
|
||||
Caching Lookups
|
||||
---------------
|
||||
The idea is to exploit key access patterns by anticipating future lookups
|
||||
based on previous lookups.
|
||||
|
||||
The simplest incarnation is to save the most recently accessed entry.
|
||||
This gives optimal performance for use cases where every get is followed
|
||||
by a set or del to the same key.
|
||||
|
|
1773
Objects/dictobject.c
1773
Objects/dictobject.c
File diff suppressed because it is too large
Load Diff
|
@ -1188,13 +1188,10 @@ _PyObject_GenericSetAttrWithDict(PyObject *obj, PyObject *name,
|
|||
if (dict == NULL) {
|
||||
dictptr = _PyObject_GetDictPtr(obj);
|
||||
if (dictptr != NULL) {
|
||||
dict = *dictptr;
|
||||
if (dict == NULL && value != NULL) {
|
||||
dict = PyDict_New();
|
||||
if (dict == NULL)
|
||||
goto done;
|
||||
*dictptr = dict;
|
||||
}
|
||||
res = _PyObjectDict_SetItem(Py_TYPE(obj), dictptr, name, value);
|
||||
if (res < 0 && PyErr_ExceptionMatches(PyExc_KeyError))
|
||||
PyErr_SetObject(PyExc_AttributeError, name);
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
if (dict != NULL) {
|
||||
|
@ -1236,22 +1233,6 @@ PyObject_GenericSetAttr(PyObject *obj, PyObject *name, PyObject *value)
|
|||
return _PyObject_GenericSetAttrWithDict(obj, name, value, NULL);
|
||||
}
|
||||
|
||||
PyObject *
|
||||
PyObject_GenericGetDict(PyObject *obj, void *context)
|
||||
{
|
||||
PyObject *dict, **dictptr = _PyObject_GetDictPtr(obj);
|
||||
if (dictptr == NULL) {
|
||||
PyErr_SetString(PyExc_AttributeError,
|
||||
"This object has no __dict__");
|
||||
return NULL;
|
||||
}
|
||||
dict = *dictptr;
|
||||
if (dict == NULL)
|
||||
*dictptr = dict = PyDict_New();
|
||||
Py_XINCREF(dict);
|
||||
return dict;
|
||||
}
|
||||
|
||||
int
|
||||
PyObject_GenericSetDict(PyObject *obj, PyObject *value, void *context)
|
||||
{
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
MCACHE_MAX_ATTR_SIZE, since it might be a problem if very large
|
||||
strings are used as attribute names. */
|
||||
#define MCACHE_MAX_ATTR_SIZE 100
|
||||
#define MCACHE_SIZE_EXP 10
|
||||
#define MCACHE_SIZE_EXP 9
|
||||
#define MCACHE_HASH(version, name_hash) \
|
||||
(((unsigned int)(version) * (unsigned int)(name_hash)) \
|
||||
>> (8*sizeof(unsigned int) - MCACHE_SIZE_EXP))
|
||||
|
@ -2306,6 +2306,9 @@ type_new(PyTypeObject *metatype, PyObject *args, PyObject *kwds)
|
|||
type->tp_dictoffset = slotoffset;
|
||||
slotoffset += sizeof(PyObject *);
|
||||
}
|
||||
if (type->tp_dictoffset) {
|
||||
et->ht_cached_keys = _PyDict_NewKeysForClass();
|
||||
}
|
||||
if (add_weak) {
|
||||
assert(!base->tp_itemsize);
|
||||
type->tp_weaklistoffset = slotoffset;
|
||||
|
@ -2411,6 +2414,9 @@ PyType_FromSpec(PyType_Spec *spec)
|
|||
res->ht_type.tp_doc = tp_doc;
|
||||
}
|
||||
}
|
||||
if (res->ht_type.tp_dictoffset) {
|
||||
res->ht_cached_keys = _PyDict_NewKeysForClass();
|
||||
}
|
||||
|
||||
if (PyType_Ready(&res->ht_type) < 0)
|
||||
goto fail;
|
||||
|
@ -2767,9 +2773,13 @@ type_traverse(PyTypeObject *type, visitproc visit, void *arg)
|
|||
return 0;
|
||||
}
|
||||
|
||||
extern void
|
||||
_PyDictKeys_DecRef(PyDictKeysObject *keys);
|
||||
|
||||
static int
|
||||
type_clear(PyTypeObject *type)
|
||||
{
|
||||
PyDictKeysObject *cached_keys;
|
||||
/* Because of type_is_gc(), the collector only calls this
|
||||
for heaptypes. */
|
||||
assert(type->tp_flags & Py_TPFLAGS_HEAPTYPE);
|
||||
|
@ -2801,6 +2811,11 @@ type_clear(PyTypeObject *type)
|
|||
*/
|
||||
|
||||
PyType_Modified(type);
|
||||
cached_keys = ((PyHeapTypeObject *)type)->ht_cached_keys;
|
||||
if (cached_keys != NULL) {
|
||||
((PyHeapTypeObject *)type)->ht_cached_keys = NULL;
|
||||
_PyDictKeys_DecRef(cached_keys);
|
||||
}
|
||||
if (type->tp_dict)
|
||||
PyDict_Clear(type->tp_dict);
|
||||
Py_CLEAR(type->tp_mro);
|
||||
|
|
|
@ -2123,70 +2123,31 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
|
|||
w = GETITEM(names, oparg);
|
||||
if (PyDict_CheckExact(f->f_globals)
|
||||
&& PyDict_CheckExact(f->f_builtins)) {
|
||||
if (PyUnicode_CheckExact(w)) {
|
||||
/* Inline the PyDict_GetItem() calls.
|
||||
WARNING: this is an extreme speed hack.
|
||||
Do not try this at home. */
|
||||
Py_hash_t hash = ((PyASCIIObject *)w)->hash;
|
||||
if (hash != -1) {
|
||||
PyDictObject *d;
|
||||
PyDictEntry *e;
|
||||
d = (PyDictObject *)(f->f_globals);
|
||||
e = d->ma_lookup(d, w, hash);
|
||||
if (e == NULL) {
|
||||
x = NULL;
|
||||
break;
|
||||
}
|
||||
x = e->me_value;
|
||||
if (x != NULL) {
|
||||
Py_INCREF(x);
|
||||
PUSH(x);
|
||||
DISPATCH();
|
||||
}
|
||||
d = (PyDictObject *)(f->f_builtins);
|
||||
e = d->ma_lookup(d, w, hash);
|
||||
if (e == NULL) {
|
||||
x = NULL;
|
||||
break;
|
||||
}
|
||||
x = e->me_value;
|
||||
if (x != NULL) {
|
||||
Py_INCREF(x);
|
||||
PUSH(x);
|
||||
DISPATCH();
|
||||
}
|
||||
goto load_global_error;
|
||||
}
|
||||
}
|
||||
/* This is the un-inlined version of the code above */
|
||||
x = PyDict_GetItem(f->f_globals, w);
|
||||
x = _PyDict_LoadGlobal((PyDictObject *)f->f_globals,
|
||||
(PyDictObject *)f->f_builtins,
|
||||
w);
|
||||
if (x == NULL) {
|
||||
x = PyDict_GetItem(f->f_builtins, w);
|
||||
if (x == NULL) {
|
||||
load_global_error:
|
||||
format_exc_check_arg(
|
||||
PyExc_NameError,
|
||||
GLOBAL_NAME_ERROR_MSG, w);
|
||||
break;
|
||||
}
|
||||
}
|
||||
Py_INCREF(x);
|
||||
PUSH(x);
|
||||
DISPATCH();
|
||||
}
|
||||
|
||||
/* Slow-path if globals or builtins is not a dict */
|
||||
x = PyObject_GetItem(f->f_globals, w);
|
||||
if (x == NULL) {
|
||||
x = PyObject_GetItem(f->f_builtins, w);
|
||||
if (x == NULL) {
|
||||
if (PyErr_ExceptionMatches(PyExc_KeyError))
|
||||
format_exc_check_arg(
|
||||
PyExc_NameError,
|
||||
GLOBAL_NAME_ERROR_MSG, w);
|
||||
if (!PyErr_Occurred())
|
||||
format_exc_check_arg(PyExc_NameError,
|
||||
GLOBAL_NAME_ERROR_MSG, w);
|
||||
break;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Slow-path if globals or builtins is not a dict */
|
||||
x = PyObject_GetItem(f->f_globals, w);
|
||||
if (x == NULL) {
|
||||
x = PyObject_GetItem(f->f_builtins, w);
|
||||
if (x == NULL) {
|
||||
if (PyErr_ExceptionMatches(PyExc_KeyError))
|
||||
format_exc_check_arg(
|
||||
PyExc_NameError,
|
||||
GLOBAL_NAME_ERROR_MSG, w);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Py_INCREF(x);
|
||||
PUSH(x);
|
||||
DISPATCH();
|
||||
|
||||
|
|
|
@ -634,9 +634,14 @@ class PyDictObjectPtr(PyObjectPtr):
|
|||
Yields a sequence of (PyObjectPtr key, PyObjectPtr value) pairs,
|
||||
analagous to dict.iteritems()
|
||||
'''
|
||||
for i in safe_range(self.field('ma_mask') + 1):
|
||||
ep = self.field('ma_table') + i
|
||||
pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
|
||||
keys = self.field('ma_keys')
|
||||
values = self.field('ma_values')
|
||||
for i in safe_range(keys['dk_size']):
|
||||
ep = keys['dk_entries'].address + i
|
||||
if long(values):
|
||||
pyop_value = PyObjectPtr.from_pyobject_ptr(values[i])
|
||||
else:
|
||||
pyop_value = PyObjectPtr.from_pyobject_ptr(ep['me_value'])
|
||||
if not pyop_value.is_null():
|
||||
pyop_key = PyObjectPtr.from_pyobject_ptr(ep['me_key'])
|
||||
yield (pyop_key, pyop_value)
|
||||
|
|
Loading…
Reference in New Issue