* Bring in INIT_NONZERO_SET_SLOTS macro from dictionary code.

* Bring in free list from dictionary code.
* Improve several comments.
* Differencing can leave many dummy entries.  If more than
  1/6 are dummies, then resize them away.
* Factor-out common code with new macro, PyAnySet_CheckExact.
This commit is contained in:
Raymond Hettinger 2005-08-07 13:02:53 +00:00
parent e9fe7e0ef3
commit bc841a1464
2 changed files with 56 additions and 19 deletions

View File

@ -59,12 +59,16 @@ struct _setobject {
PyAPI_DATA(PyTypeObject) PySet_Type; PyAPI_DATA(PyTypeObject) PySet_Type;
PyAPI_DATA(PyTypeObject) PyFrozenSet_Type; PyAPI_DATA(PyTypeObject) PyFrozenSet_Type;
/* Invariants for frozensets only: /* Invariants for frozensets:
* data is immutable. * data is immutable.
* hash is the hash of the frozenset or -1 if not computed yet. * hash is the hash of the frozenset or -1 if not computed yet.
* Invariants for sets:
* hash is -1
*/ */
#define PyFrozenSet_CheckExact(ob) ((ob)->ob_type == &PyFrozenSet_Type) #define PyFrozenSet_CheckExact(ob) ((ob)->ob_type == &PyFrozenSet_Type)
#define PyAnySet_CheckExact(ob) \
((ob)->ob_type == &PySet_Type || (ob)->ob_type == &PyFrozenSet_Type)
#define PyAnySet_Check(ob) \ #define PyAnySet_Check(ob) \
((ob)->ob_type == &PySet_Type || (ob)->ob_type == &PyFrozenSet_Type || \ ((ob)->ob_type == &PySet_Type || (ob)->ob_type == &PyFrozenSet_Type || \
PyType_IsSubtype((ob)->ob_type, &PySet_Type) || \ PyType_IsSubtype((ob)->ob_type, &PySet_Type) || \

View File

@ -15,13 +15,22 @@
/* Object used as dummy key to fill deleted entries */ /* Object used as dummy key to fill deleted entries */
static PyObject *dummy = NULL; /* Initialized by first call to make_new_set() */ static PyObject *dummy = NULL; /* Initialized by first call to make_new_set() */
#define INIT_NONZERO_SET_SLOTS(so) do { \
(so)->table = (so)->smalltable; \
(so)->mask = PySet_MINSIZE - 1; \
(so)->hash = -1; \
} while(0)
#define EMPTY_TO_MINSIZE(so) do { \ #define EMPTY_TO_MINSIZE(so) do { \
memset((so)->smalltable, 0, sizeof((so)->smalltable)); \ memset((so)->smalltable, 0, sizeof((so)->smalltable)); \
(so)->used = (so)->fill = 0; \ (so)->used = (so)->fill = 0; \
(so)->table = (so)->smalltable; \ INIT_NONZERO_SET_SLOTS(so); \
(so)->mask = PySet_MINSIZE - 1; \
} while(0) } while(0)
/* Reuse scheme to save calls to malloc, free, and memset */
#define MAXFREESETS 80
static PySetObject *free_sets[MAXFREESETS];
static int num_free_sets = 0;
/* /*
The basic lookup function used by all operations. The basic lookup function used by all operations.
@ -30,13 +39,15 @@ Open addressing is preferred over chaining since the link overhead for
chaining would be substantial (100% with typical malloc overhead). chaining would be substantial (100% with typical malloc overhead).
The initial probe index is computed as hash mod the table size. Subsequent The initial probe index is computed as hash mod the table size. Subsequent
probe indices are computed as explained earlier. probe indices are computed as explained in Objects/dictobject.c.
All arithmetic on hash should ignore overflow. All arithmetic on hash should ignore overflow.
This function must never return NULL; failures are indicated by returning The lookup function always succeeds and nevers return NULL. This simplifies
a setentry* for which the value field is NULL. Exceptions are never and speeds client functions which do won't have to test for and handle
reported by this function, and outstanding exceptions are maintained. errors. To meet that requirement, any errors generated by a user defined
__cmp__() function are simply cleared and ignored.
Previously outstanding exceptions are maintained.
*/ */
static setentry * static setentry *
@ -187,7 +198,7 @@ set_lookkey_string(PySetObject *so, PyObject *key, register long hash)
freeslot = entry; freeslot = entry;
} }
} else { } else {
/* Simplified loop that can assume are no dummy entries */ /* Simplified loop when there are no dummy entries. */
if (entry->hash == hash && _PyString_Eq(entry->key, key)) if (entry->hash == hash && _PyString_Eq(entry->key, key))
return entry; return entry;
for (perturb = hash; ; perturb >>= PERTURB_SHIFT) { for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
@ -347,7 +358,7 @@ set_add_internal(register PySetObject *so, PyObject *key)
set_insert_key(so, key, hash); set_insert_key(so, key, hash);
if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2)) if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
return 0; return 0;
return set_table_resize(so, so->used*(so->used>50000 ? 2 : 4)); return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
} }
#define DISCARD_NOTFOUND 0 #define DISCARD_NOTFOUND 0
@ -439,7 +450,6 @@ set_clear_internal(PySetObject *so)
if (table_is_malloced) if (table_is_malloced)
PyMem_DEL(table); PyMem_DEL(table);
so->hash = -1;
return 0; return 0;
} }
@ -710,16 +720,24 @@ make_new_set(PyTypeObject *type, PyObject *iterable)
} }
/* create PySetObject structure */ /* create PySetObject structure */
so = (PySetObject *)type->tp_alloc(type, 0); if (num_free_sets &&
if (so == NULL) (type == &PySet_Type || type == &PyFrozenSet_Type)) {
return NULL; so = free_sets[--num_free_sets];
assert (so != NULL && PyAnySet_CheckExact(so));
so->ob_type = type;
_Py_NewReference((PyObject *)so);
EMPTY_TO_MINSIZE(so);
PyObject_GC_Track(so);
} else {
so = (PySetObject *)type->tp_alloc(type, 0);
if (so == NULL)
return NULL;
/* tp_alloc has already zeroed the structure */
assert(so->table == NULL && so->fill == 0 && so->used == 0);
INIT_NONZERO_SET_SLOTS(so);
}
/* tp_alloc has already zeroed the structure */
assert(so->table == NULL && so->fill == 0 && so->used == 0);
so->table = so->smalltable;
so->mask = PySet_MINSIZE - 1;
so->lookup = set_lookkey_string; so->lookup = set_lookkey_string;
so->hash = -1;
so->weakreflist = NULL; so->weakreflist = NULL;
if (iterable != NULL) { if (iterable != NULL) {
@ -767,6 +785,13 @@ frozenset_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
void void
PySet_Fini(void) PySet_Fini(void)
{ {
PySetObject *so;
while (num_free_sets) {
num_free_sets--;
so = free_sets[num_free_sets];
PyObject_GC_Del(so);
}
Py_XDECREF(dummy); Py_XDECREF(dummy);
Py_XDECREF(emptyfrozenset); Py_XDECREF(emptyfrozenset);
} }
@ -797,7 +822,10 @@ set_dealloc(PySetObject *so)
if (so->table != so->smalltable) if (so->table != so->smalltable)
PyMem_DEL(so->table); PyMem_DEL(so->table);
so->ob_type->tp_free(so); if (num_free_sets < MAXFREESETS && PyAnySet_CheckExact(so))
free_sets[num_free_sets++] = so;
else
so->ob_type->tp_free(so);
Py_TRASHCAN_SAFE_END(so) Py_TRASHCAN_SAFE_END(so)
} }
@ -1079,6 +1107,11 @@ set_difference_update(PySetObject *so, PyObject *other)
Py_DECREF(it); Py_DECREF(it);
if (PyErr_Occurred()) if (PyErr_Occurred())
return NULL; return NULL;
/* If more than 1/6 are dummies, then resize them away. */
if ((so->fill - so->used) * 6 < so->mask)
Py_RETURN_NONE;
if (set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4) == -1)
return NULL;
Py_RETURN_NONE; Py_RETURN_NONE;
} }