SF patch #425242: Patch which "inlines" small dictionaries.

The idea is Marc-Andre Lemburg's, the implementation is Tim's.
Add a new ma_smalltable member to dictobjects, an embedded vector of
MINSIZE (8) dictentry structs.  Short course is that this lets us avoid
additional malloc(s) for dicts with no more than 5 entries.

The changes are widespread but mostly small.

Long course:  WRT speed, all scalar operations (getitem, setitem, delitem)
on non-empty dicts benefit from no longer needing NULL-pointer checks
(ma_table is never NULL anymore).  Bulk operations (copy, update, resize,
clearing slots during dealloc) benefit in some cases from now looping
on the ma_fill count rather than on ma_size, but that was an unexpected
benefit:  the original reason to loop on ma_fill was to let bulk
operations on empty dicts end quickly (since the NULL-pointer checks
went away, empty dicts aren't special-cased any more).

Special considerations:

For dicts that remain empty, this change is a lose on two counts:
the dict object contains 8 new dictentry slots now that weren't
needed before, and dict object creation also spends time memset'ing
these doomed-to-be-unsused slots to NULLs.

For dicts with one or two entries that never get larger than 2, it's
a mix:  a malloc()/free() pair is no longer needed, and the 2-entry case
gets to use 8 slots (instead of 4) thus decreasing the chance of
collision.  Against that, dict object creation spends time memset'ing
4 slots that aren't strictly needed in this case.

For dicts with 3 through 5 entries that never get larger than 5, it's a
pure win:  the dict is created with all the space they need, and they
never need to resize.  Before they suffered two malloc()/free() calls,
plus 1 dict resize, to get enough space.  In addition, the 8-slot
table they ended with consumed more memory overall, because of the
hidden overhead due to the additional malloc.

For dicts with 6 or more entries, the ma_smalltable member is wasted
space, but then these are large(r) dicts so 8 slots more or less doesn't
make much difference.  They still benefit all the time from removing
ubiquitous dynamic null-pointer checks, and get a small benefit (but
relatively smaller the larger the dict) from not having to do two
mallocs, two frees, and a resize on the way *to* getting their sixth
entry.

All in all it appears a small but definite general win, with larger
benefits in specific cases.  It's especially nice that it allowed to
get rid of several branches, gotos and labels, and overall made the
code smaller.
This commit is contained in:
Tim Peters 2001-05-22 20:40:22 +00:00
parent 5379d05dc3
commit dea48ec581
1 changed files with 146 additions and 82 deletions

View File

@ -5,10 +5,12 @@
/*
* MINSIZE is the minimum size of a dictionary.
* MINSIZE is the minimum size of a dictionary. This many slots are
* allocated directly in the dict object (in the ma_smalltable member).
* This must be a power of 2, and the first entry in the polys[] vector must
* match.
*/
#define MINSIZE 4
#define MINSIZE 8
/* define this out if you don't want conversion statistics on exit */
#undef SHOW_CONVERSION_COUNTS
@ -16,10 +18,24 @@
/*
Table of irreducible polynomials to efficiently cycle through
GF(2^n)-{0}, 2<=n<=30. A table size is always a power of 2.
For a table size of 2**i, the polys entry is 2**i + j for some j in 1 thru
2**i-1 inclusive. The polys[] entries here happen to add in the smallest j
values "that work". Work means this: given any integer k in 1 thru 2**i-1
inclusive, a poly works if & only if repeating this code:
print k
k <<= 1
if k >= 2**i:
k ^= poly
prints every integer in 1 thru 2**i-1 inclusive exactly once before printing
k a second time. Theory can be used to find such polys efficiently, but the
operational defn. of "works" is sufficient to find them in reasonable time
via brute force program (hint: any poly that has an even number of 1 bits
cannot work; ditto any poly with low bit 0; exploit those).
*/
static long polys[] = {
4 + 3,
8 + 3,
/* 4 + 3, */ /* first active entry if MINSIZE == 4 */
8 + 3, /* first active entry if MINSIZE == 8 */
16 + 3,
32 + 5,
64 + 3,
@ -46,7 +62,8 @@ static long polys[] = {
134217728 + 39,
268435456 + 9,
536870912 + 5,
1073741824 + 83,
1073741824 + 83
/* 2147483648 + 9 -- if we ever boost this to unsigned long */
};
/* Object used as dummy key to fill deleted entries */
@ -100,8 +117,14 @@ struct dictobject {
int ma_used; /* # Active */
int ma_size; /* total # slots in ma_table */
int ma_poly; /* appopriate entry from polys vector */
/* ma_table points to ma_smalltable for small tables, else to
* additional malloc'ed memory. ma_table is never NULL! This rule
* saves repeated runtime null-tests in the workhorse getitem and
* setitem calls.
*/
dictentry *ma_table;
dictentry *(*ma_lookup)(dictobject *mp, PyObject *key, long hash);
dictentry ma_smalltable[MINSIZE];
};
/* forward declarations */
@ -121,6 +144,16 @@ show_counts(void)
}
#endif
/* Set dictobject* mp to empty but w/ MINSIZE slots, using ma_smalltable. */
#define empty_to_minsize(mp) do { \
memset((mp)->ma_smalltable, 0, sizeof((mp)->ma_smalltable)); \
(mp)->ma_table = (mp)->ma_smalltable; \
(mp)->ma_size = MINSIZE; \
(mp)->ma_used = (mp)->ma_fill = 0; \
(mp)->ma_poly = polys[0]; \
assert(MINSIZE < (mp)->ma_poly && (mp)->ma_poly < MINSIZE*2); \
} while(0)
PyObject *
PyDict_New(void)
{
@ -136,11 +169,7 @@ PyDict_New(void)
mp = PyObject_NEW(dictobject, &PyDict_Type);
if (mp == NULL)
return NULL;
mp->ma_size = 0;
mp->ma_poly = 0;
mp->ma_table = NULL;
mp->ma_fill = 0;
mp->ma_used = 0;
empty_to_minsize(mp);
mp->ma_lookup = lookdict_string;
#ifdef SHOW_CONVERSION_COUNTS
++created;
@ -320,7 +349,7 @@ lookdict_string(dictobject *mp, PyObject *key, register long hash)
&& ep->me_key != dummy
&& compare(ep->me_key, key) == 0))
return ep;
else if (ep->me_key == dummy && freeslot == NULL)
if (ep->me_key == dummy && freeslot == NULL)
freeslot = ep;
/* Cycle through GF(2^n)-{0} */
incr <<= 1;
@ -374,43 +403,60 @@ dictresize(dictobject *mp, int minused)
register int i;
assert(minused >= 0);
for (i = 0, newsize = MINSIZE; ; i++, newsize <<= 1) {
if (i >= sizeof(polys)/sizeof(polys[0])) {
/* Ran out of polynomials */
PyErr_NoMemory();
return -1;
}
assert(oldtable != NULL);
newpoly = 0;
newsize = MINSIZE;
for (i = 0; i < sizeof(polys)/sizeof(polys[0]); ++i) {
if (newsize > minused) {
newpoly = polys[i];
break;
}
newsize <<= 1;
if (newsize < 0) /* overflow */
break;
}
newtable = PyMem_NEW(dictentry, newsize);
if (newtable == NULL) {
if (newpoly == 0) {
/* Ran out of polynomials or newsize overflowed. */
PyErr_NoMemory();
return -1;
}
memset(newtable, '\0', sizeof(dictentry) * newsize);
mp->ma_size = newsize;
mp->ma_poly = newpoly;
if (newsize == MINSIZE) {
newtable = mp->ma_smalltable;
if (newtable == oldtable)
return 0;
}
else {
newtable = PyMem_NEW(dictentry, newsize);
if (newtable == NULL) {
PyErr_NoMemory();
return -1;
}
}
assert(newtable != oldtable);
mp->ma_table = newtable;
mp->ma_fill = 0;
mp->ma_size = newsize;
memset(newtable, 0, sizeof(dictentry) * newsize);
mp->ma_poly = newpoly;
mp->ma_used = 0;
i = mp->ma_fill;
mp->ma_fill = 0;
/* Copy the data over; this is refcount-neutral for active entries;
dummy entries aren't copied over, of course */
for (i = 0, ep = oldtable; i < oldsize; i++, ep++) {
if (ep->me_value != NULL) /* active entry */
for (ep = oldtable; i > 0; ep++) {
if (ep->me_value != NULL) { /* active entry */
--i;
insertdict(mp, ep->me_key, ep->me_hash, ep->me_value);
}
else if (ep->me_key != NULL) { /* dummy entry */
--i;
assert(ep->me_key == dummy);
Py_DECREF(ep->me_key);
}
/* else key == value == NULL: nothing to do */
}
if (oldtable != NULL)
if (oldtable != mp->ma_smalltable)
PyMem_DEL(oldtable);
return 0;
}
@ -423,8 +469,6 @@ PyDict_GetItem(PyObject *op, PyObject *key)
if (!PyDict_Check(op)) {
return NULL;
}
if (mp->ma_table == NULL)
return NULL;
#ifdef CACHE_HASH
if (!PyString_Check(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1)
@ -479,16 +523,7 @@ PyDict_SetItem(register PyObject *op, PyObject *key, PyObject *value)
if (hash == -1)
return -1;
}
if (mp->ma_fill >= mp->ma_size) {
/* No room for a new key.
* This only happens when the dict is empty.
* Let dictresize() create a minimal dict.
*/
assert(mp->ma_used == 0);
if (dictresize(mp, 0) != 0)
return -1;
assert(mp->ma_fill < mp->ma_size);
}
assert(mp->ma_fill < mp->ma_size);
n_used = mp->ma_used;
Py_INCREF(value);
Py_INCREF(key);
@ -528,11 +563,8 @@ PyDict_DelItem(PyObject *op, PyObject *key)
return -1;
}
mp = (dictobject *)op;
if (((dictobject *)op)->ma_table == NULL)
goto empty;
ep = (mp->ma_lookup)(mp, key, hash);
if (ep->me_value == NULL) {
empty:
PyErr_SetObject(PyExc_KeyError, key);
return -1;
}
@ -550,23 +582,70 @@ PyDict_DelItem(PyObject *op, PyObject *key)
void
PyDict_Clear(PyObject *op)
{
int i, n;
register dictentry *table;
dictobject *mp;
dictentry *ep, *table;
int table_is_malloced;
int fill;
dictentry small_copy[MINSIZE];
#ifdef Py_DEBUG
int i, n;
#endif
if (!PyDict_Check(op))
return;
mp = (dictobject *)op;
table = mp->ma_table;
if (table == NULL)
return;
#ifdef Py_DEBUG
n = mp->ma_size;
mp->ma_size = mp->ma_used = mp->ma_fill = 0;
mp->ma_table = NULL;
for (i = 0; i < n; i++) {
Py_XDECREF(table[i].me_key);
Py_XDECREF(table[i].me_value);
i = 0;
#endif
table = mp->ma_table;
assert(table != NULL);
table_is_malloced = table != mp->ma_smalltable;
/* This is delicate. During the process of clearing the dict,
* decrefs can cause the dict to mutate. To avoid fatal confusion
* (voice of experience), we have to make the dict empty before
* clearing the slots, and never refer to anything via mp->xxx while
* clearing.
*/
fill = mp->ma_fill;
if (table_is_malloced)
empty_to_minsize(mp);
else if (fill > 0) {
/* It's a small table with something that needs to be cleared.
* Afraid the only safe way is to copy the dict entries into
* another small table first.
*/
memcpy(small_copy, table, sizeof(small_copy));
table = small_copy;
empty_to_minsize(mp);
}
PyMem_DEL(table);
/* else it's a small table that's already empty */
/* Now we can finally clear things. If C had refcounts, we could
* assert that the refcount on table is 1 now, i.e. that this function
* has unique access to it, so decref side-effects can't alter it.
*/
for (ep = table; fill > 0; ++ep) {
#ifdef Py_DEBUG
assert(i < n);
++i;
#endif
if (ep->me_key) {
--fill;
Py_DECREF(ep->me_key);
Py_XDECREF(ep->me_value);
}
#ifdef Py_DEBUG
else
assert(ep->me_value == NULL);
#endif
}
if (table_is_malloced)
PyMem_DEL(table);
}
/* CAUTION: In general, it isn't safe to use PyDict_Next in a loop that
@ -602,19 +681,18 @@ PyDict_Next(PyObject *op, int *ppos, PyObject **pkey, PyObject **pvalue)
static void
dict_dealloc(register dictobject *mp)
{
register int i;
register dictentry *ep;
int fill = mp->ma_fill;
Py_TRASHCAN_SAFE_BEGIN(mp)
PyObject_GC_Fini(mp);
for (i = 0, ep = mp->ma_table; i < mp->ma_size; i++, ep++) {
if (ep->me_key != NULL) {
for (ep = mp->ma_table; fill > 0; ep++) {
if (ep->me_key) {
--fill;
Py_DECREF(ep->me_key);
}
if (ep->me_value != NULL) {
Py_DECREF(ep->me_value);
Py_XDECREF(ep->me_value);
}
}
if (mp->ma_table != NULL)
if (mp->ma_table != mp->ma_smalltable)
PyMem_DEL(mp->ma_table);
mp = (dictobject *) PyObject_AS_GC(mp);
PyObject_DEL(mp);
@ -705,10 +783,7 @@ dict_subscript(dictobject *mp, register PyObject *key)
{
PyObject *v;
long hash;
if (mp->ma_table == NULL) {
PyErr_SetObject(PyExc_KeyError, key);
return NULL;
}
assert(mp->ma_table != NULL);
#ifdef CACHE_HASH
if (!PyString_Check(key) ||
(hash = ((PyStringObject *) key)->ob_shash) == -1)
@ -1168,8 +1243,7 @@ dict_has_key(register dictobject *mp, PyObject *args)
if (hash == -1)
return NULL;
}
ok = (mp->ma_size != 0
&& (mp->ma_lookup)(mp, key, hash)->me_value != NULL);
ok = (mp->ma_lookup)(mp, key, hash)->me_value != NULL;
return PyInt_FromLong(ok);
}
@ -1183,8 +1257,6 @@ dict_get(register dictobject *mp, PyObject *args)
if (!PyArg_ParseTuple(args, "O|O:get", &key, &failobj))
return NULL;
if (mp->ma_table == NULL)
goto finally;
#ifdef CACHE_HASH
if (!PyString_Check(key) ||
@ -1197,7 +1269,6 @@ dict_get(register dictobject *mp, PyObject *args)
}
val = (mp->ma_lookup)(mp, key, hash)->me_value;
finally:
if (val == NULL)
val = failobj;
Py_INCREF(val);
@ -1215,8 +1286,6 @@ dict_setdefault(register dictobject *mp, PyObject *args)
if (!PyArg_ParseTuple(args, "O|O:setdefault", &key, &failobj))
return NULL;
if (mp->ma_table == NULL)
goto finally;
#ifdef CACHE_HASH
if (!PyString_Check(key) ||
@ -1228,8 +1297,6 @@ dict_setdefault(register dictobject *mp, PyObject *args)
return NULL;
}
val = (mp->ma_lookup)(mp, key, hash)->me_value;
finally:
if (val == NULL) {
val = failobj;
if (PyDict_SetItem((PyObject*)mp, key, failobj))
@ -1283,12 +1350,10 @@ dict_popitem(dictobject *mp, PyObject *args)
ep = &mp->ma_table[0];
if (ep->me_value == NULL) {
i = (int)ep->me_hash;
/* The hash field may be uninitialized trash, or it
* may be a real hash value, or it may be a legit
* search finger, or it may be a once-legit search
* finger that's out of bounds now because it
* wrapped around or the table shrunk -- simply
* make sure it's in bounds now.
/* The hash field may be a real hash value, or it may be a
* legit search finger, or it may be a once-legit search
* finger that's out of bounds now because it wrapped around
* or the table shrunk -- simply make sure it's in bounds now.
*/
if (i >= mp->ma_size || i < 1)
i = 1; /* skip slot 0 */
@ -1480,8 +1545,7 @@ dict_contains(dictobject *mp, PyObject *key)
if (hash == -1)
return -1;
}
return (mp->ma_size != 0
&& (mp->ma_lookup)(mp, key, hash)->me_value != NULL);
return (mp->ma_lookup)(mp, key, hash)->me_value != NULL;
}
/* Hack to implement "key in dict" */