Revised the set() and frozenset() implementaion to use its own internal

data structure instead of using dictionaries.  Reduces memory consumption
by 1/3 and provides modest speed-ups for most set operations.
This commit is contained in:
Raymond Hettinger 2005-07-31 01:16:36 +00:00
parent fe25643192
commit 9f1a6796eb
3 changed files with 917 additions and 229 deletions

View File

@ -1,4 +1,3 @@
/* Set object interface */ /* Set object interface */
#ifndef Py_SETOBJECT_H #ifndef Py_SETOBJECT_H
@ -7,28 +6,61 @@
extern "C" { extern "C" {
#endif #endif
/*
There are three kinds of slots in the table:
1. Unused: key == NULL
2. Active: key != NULL and key != dummy
3. Dummy: key == dummy
*/
#define PySet_MINSIZE 8
typedef struct {
long hash; /* cached hash code for the entry key */
PyObject *key;
} setentry;
/* /*
This data structure is shared by set and frozenset objects. This data structure is shared by set and frozenset objects.
*/ */
typedef struct { typedef struct _setobject PySetObject;
struct _setobject {
PyObject_HEAD PyObject_HEAD
PyObject *data;
long hash; /* only used by frozenset objects */
PyObject *weakreflist; /* List of weak references */
/* Invariants: int fill; /* # Active + # Dummy */
* data is a dictionary whose values are all True. int used; /* # Active */
* data points to the same dict for the whole life of the set.
* For frozensets only: /* The table contains mask + 1 slots, and that's a power of 2.
* data is immutable. * We store the mask instead of the size because the mask is more
* hash is the hash of the frozenset or -1 if not computed yet. * frequently needed.
*/ */
} PySetObject; int mask;
/* table points to smalltable for small tables, else to
* additional malloc'ed memory. table is never NULL! This rule
* saves repeated runtime null-tests in the workhorse getitem and
* setitem calls.
*/
setentry *table;
setentry *(*lookup)(PySetObject *so, PyObject *key, long hash);
setentry smalltable[PySet_MINSIZE];
long hash; /* only used by frozenset objects */
PyObject *weakreflist; /* List of weak references */
};
PyAPI_DATA(PyTypeObject) PySet_Type; PyAPI_DATA(PyTypeObject) PySet_Type;
PyAPI_DATA(PyTypeObject) PyFrozenSet_Type; PyAPI_DATA(PyTypeObject) PyFrozenSet_Type;
/* Invariants for frozensets only:
* data is immutable.
* hash is the hash of the frozenset or -1 if not computed yet.
*/
#define PyFrozenSet_CheckExact(ob) ((ob)->ob_type == &PyFrozenSet_Type) #define PyFrozenSet_CheckExact(ob) ((ob)->ob_type == &PyFrozenSet_Type)
#define PyAnySet_Check(ob) \ #define PyAnySet_Check(ob) \
((ob)->ob_type == &PySet_Type || (ob)->ob_type == &PyFrozenSet_Type || \ ((ob)->ob_type == &PySet_Type || (ob)->ob_type == &PyFrozenSet_Type || \

View File

@ -12,6 +12,10 @@ What's New in Python 2.5 alpha 1?
Core and builtins Core and builtins
----------------- -----------------
- The implementation of set() and frozenset() was revised to use its
own internal data structure. Memory consumption is reduced by 1/3
and there are modest speed-ups as well. The API is unchanged.
- SF bug #1238681: freed pointer is used in longobject.c:long_pow(). - SF bug #1238681: freed pointer is used in longobject.c:long_pow().
- SF bug #1229429: PyObject_CallMethod failed to decrement some - SF bug #1229429: PyObject_CallMethod failed to decrement some

File diff suppressed because it is too large Load Diff