hashtable.h now supports keys of any size

Issue #26588: hashtable.h now supports keys of any size, not only
sizeof(void*). It allows to support key larger than sizeof(void*), but also to
use less memory for key smaller than sizeof(void*).
This commit is contained in:
Victor Stinner 2016-03-21 22:00:58 +01:00
parent 928bff0b26
commit 285cf0a601
4 changed files with 299 additions and 143 deletions

View File

@ -196,23 +196,38 @@ set_reentrant(int reentrant)
}
#endif
static int
hashtable_compare_unicode(const void *key, const _Py_hashtable_entry_t *entry)
static Py_uhash_t
hashtable_hash_pyobject(size_t key_size, const void *pkey)
{
if (key != NULL && entry->key != NULL)
return (PyUnicode_Compare((PyObject *)key, (PyObject *)entry->key) == 0);
PyObject *obj;
_Py_HASHTABLE_READ_KEY(key_size, pkey, obj);
return PyObject_Hash(obj);
}
static int
hashtable_compare_unicode(size_t key_size, const void *pkey,
const _Py_hashtable_entry_t *entry)
{
PyObject *key, *entry_key;
_Py_HASHTABLE_READ_KEY(key_size, pkey, key);
_Py_HASHTABLE_ENTRY_READ_KEY(key_size, entry, entry_key);
if (key != NULL && entry_key != NULL)
return (PyUnicode_Compare(key, entry_key) == 0);
else
return key == entry->key;
return key == entry_key;
}
static _Py_hashtable_allocator_t hashtable_alloc = {malloc, free};
static _Py_hashtable_t *
hashtable_new(size_t data_size,
hashtable_new(size_t key_size, size_t data_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func)
{
return _Py_hashtable_new_full(data_size, 0,
return _Py_hashtable_new_full(key_size, data_size, 0,
hash_func, compare_func,
NULL, NULL, NULL, &hashtable_alloc);
}
@ -230,20 +245,25 @@ raw_free(void *ptr)
}
static Py_uhash_t
hashtable_hash_traceback(const void *key)
hashtable_hash_traceback(size_t key_size, const void *pkey)
{
const traceback_t *traceback = key;
const traceback_t *traceback;
_Py_HASHTABLE_READ_KEY(key_size, pkey, traceback);
return traceback->hash;
}
static int
hashtable_compare_traceback(const traceback_t *traceback1,
hashtable_compare_traceback(size_t key_size, const void *pkey,
const _Py_hashtable_entry_t *he)
{
const traceback_t *traceback2 = he->key;
traceback_t *traceback1, *traceback2;
const frame_t *frame1, *frame2;
int i;
_Py_HASHTABLE_READ_KEY(key_size, pkey, traceback1);
_Py_HASHTABLE_ENTRY_READ_KEY(key_size, he, traceback2);
if (traceback1->nframe != traceback2->nframe)
return 0;
@ -312,15 +332,16 @@ tracemalloc_get_frame(PyFrameObject *pyframe, frame_t *frame)
}
/* intern the filename */
entry = _Py_hashtable_get_entry(tracemalloc_filenames, filename);
entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_filenames, filename);
if (entry != NULL) {
filename = (PyObject *)entry->key;
_Py_HASHTABLE_ENTRY_READ_KEY(tracemalloc_filenames->key_size, entry,
filename);
}
else {
/* tracemalloc_filenames is responsible to keep a reference
to the filename */
Py_INCREF(filename);
if (_Py_hashtable_set(tracemalloc_filenames, filename, NULL, 0) < 0) {
if (_Py_HASHTABLE_SET_NODATA(tracemalloc_filenames, filename) < 0) {
Py_DECREF(filename);
#ifdef TRACE_DEBUG
tracemalloc_error("failed to intern the filename");
@ -403,9 +424,10 @@ traceback_new(void)
traceback->hash = traceback_hash(traceback);
/* intern the traceback */
entry = _Py_hashtable_get_entry(tracemalloc_tracebacks, traceback);
entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_tracebacks, traceback);
if (entry != NULL) {
traceback = (traceback_t *)entry->key;
_Py_HASHTABLE_ENTRY_READ_KEY(tracemalloc_tracebacks->key_size, entry,
traceback);
}
else {
traceback_t *copy;
@ -422,7 +444,7 @@ traceback_new(void)
}
memcpy(copy, traceback, traceback_size);
if (_Py_hashtable_set(tracemalloc_tracebacks, copy, NULL, 0) < 0) {
if (_Py_HASHTABLE_SET_NODATA(tracemalloc_tracebacks, copy) < 0) {
raw_free(copy);
#ifdef TRACE_DEBUG
tracemalloc_error("failed to intern the traceback: putdata failed");
@ -464,7 +486,7 @@ tracemalloc_remove_trace(void *ptr)
{
trace_t trace;
if (_Py_hashtable_pop(tracemalloc_traces, ptr, &trace, sizeof(trace))) {
if (_Py_HASHTABLE_POP(tracemalloc_traces, ptr, trace)) {
assert(tracemalloc_traced_memory >= trace.size);
tracemalloc_traced_memory -= trace.size;
}
@ -714,17 +736,23 @@ tracemalloc_raw_realloc(void *ctx, void *ptr, size_t new_size)
#endif /* TRACE_RAW_MALLOC */
static int
tracemalloc_clear_filename(_Py_hashtable_entry_t *entry, void *user_data)
tracemalloc_clear_filename(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
void *user_data)
{
PyObject *filename = (PyObject *)entry->key;
PyObject *filename;
_Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, filename);
Py_DECREF(filename);
return 0;
}
static int
traceback_free_traceback(_Py_hashtable_entry_t *entry, void *user_data)
traceback_free_traceback(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
void *user_data)
{
traceback_t *traceback = (traceback_t *)entry->key;
traceback_t *traceback;
_Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, traceback);
raw_free(traceback);
return 0;
}
@ -791,21 +819,20 @@ tracemalloc_init(void)
}
#endif
tracemalloc_filenames = hashtable_new(0,
(_Py_hashtable_hash_func)PyObject_Hash,
tracemalloc_filenames = hashtable_new(sizeof(PyObject *), 0,
hashtable_hash_pyobject,
hashtable_compare_unicode);
tracemalloc_tracebacks = hashtable_new(0,
(_Py_hashtable_hash_func)hashtable_hash_traceback,
(_Py_hashtable_compare_func)hashtable_compare_traceback);
tracemalloc_tracebacks = hashtable_new(sizeof(traceback_t *), 0,
hashtable_hash_traceback,
hashtable_compare_traceback);
tracemalloc_traces = hashtable_new(sizeof(trace_t),
tracemalloc_traces = hashtable_new(sizeof(void*), sizeof(trace_t),
_Py_hashtable_hash_ptr,
_Py_hashtable_compare_direct);
if (tracemalloc_filenames == NULL || tracemalloc_tracebacks == NULL
|| tracemalloc_traces == NULL)
{
|| tracemalloc_traces == NULL) {
PyErr_NoMemory();
return -1;
}
@ -840,9 +867,9 @@ tracemalloc_deinit(void)
tracemalloc_stop();
/* destroy hash tables */
_Py_hashtable_destroy(tracemalloc_traces);
_Py_hashtable_destroy(tracemalloc_tracebacks);
_Py_hashtable_destroy(tracemalloc_filenames);
_Py_hashtable_destroy(tracemalloc_traces);
#if defined(WITH_THREAD) && defined(TRACE_RAW_MALLOC)
if (tables_lock != NULL) {
@ -935,8 +962,9 @@ tracemalloc_stop(void)
PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem);
PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj);
/* release memory */
tracemalloc_clear_traces();
/* release memory */
raw_free(tracemalloc_traceback);
tracemalloc_traceback = NULL;
}
@ -1065,14 +1093,15 @@ typedef struct {
} get_traces_t;
static int
tracemalloc_get_traces_fill(_Py_hashtable_entry_t *entry, void *user_data)
tracemalloc_get_traces_fill(_Py_hashtable_t *traces, _Py_hashtable_entry_t *entry,
void *user_data)
{
get_traces_t *get_traces = user_data;
trace_t *trace;
PyObject *tracemalloc_obj;
int res;
trace = (trace_t *)_Py_HASHTABLE_ENTRY_DATA(entry);
trace = (trace_t *)_Py_HASHTABLE_ENTRY_DATA(traces, entry);
tracemalloc_obj = trace_to_pyobject(trace, get_traces->tracebacks);
if (tracemalloc_obj == NULL)
@ -1087,9 +1116,11 @@ tracemalloc_get_traces_fill(_Py_hashtable_entry_t *entry, void *user_data)
}
static int
tracemalloc_pyobject_decref_cb(_Py_hashtable_entry_t *entry, void *user_data)
tracemalloc_pyobject_decref_cb(_Py_hashtable_t *tracebacks,
_Py_hashtable_entry_t *entry,
void *user_data)
{
PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(tracebacks, entry);
Py_DECREF(obj);
return 0;
}
@ -1120,7 +1151,7 @@ py_tracemalloc_get_traces(PyObject *self, PyObject *obj)
/* the traceback hash table is used temporarily to intern traceback tuple
of (filename, lineno) tuples */
get_traces.tracebacks = hashtable_new(sizeof(PyObject *),
get_traces.tracebacks = hashtable_new(sizeof(traceback_t *), sizeof(PyObject *),
_Py_hashtable_hash_ptr,
_Py_hashtable_compare_direct);
if (get_traces.tracebacks == NULL) {
@ -1152,7 +1183,7 @@ error:
finally:
if (get_traces.tracebacks != NULL) {
_Py_hashtable_foreach(get_traces.tracebacks,
tracemalloc_pyobject_decref_cb, NULL);
tracemalloc_pyobject_decref_cb, NULL);
_Py_hashtable_destroy(get_traces.tracebacks);
}
if (get_traces.traces != NULL)

View File

@ -1,5 +1,5 @@
/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash
project:
/* The implementation of the hash table (_Py_hashtable_t) is based on the
cfuhash project:
http://sourceforge.net/projects/libcfu/
Copyright of cfuhash:
@ -59,7 +59,7 @@
#define ENTRY_NEXT(ENTRY) \
((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
#define HASHTABLE_ITEM_SIZE(HT) \
(sizeof(_Py_hashtable_entry_t) + (HT)->data_size)
(sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size)
/* Forward declaration */
static void hashtable_rehash(_Py_hashtable_t *ht);
@ -70,6 +70,7 @@ _Py_slist_init(_Py_slist_t *list)
list->head = NULL;
}
static void
_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
{
@ -77,6 +78,7 @@ _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
list->head = item;
}
static void
_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
_Py_slist_item_t *item)
@ -87,24 +89,26 @@ _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
list->head = item->next;
}
Py_uhash_t
_Py_hashtable_hash_int(const void *key)
{
return (Py_uhash_t)key;
}
Py_uhash_t
_Py_hashtable_hash_ptr(const void *key)
_Py_hashtable_hash_ptr(size_t key_size, const void *pkey)
{
void *key;
_Py_HASHTABLE_READ_KEY(key_size, pkey, key);
return (Py_uhash_t)_Py_HashPointer((void *)key);
}
int
_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry)
_Py_hashtable_compare_direct(size_t key_size, const void *pkey,
const _Py_hashtable_entry_t *entry)
{
return entry->key == key;
const void *pkey2 = _Py_HASHTABLE_ENTRY_KEY(entry);
return (memcmp(pkey, pkey2, key_size) == 0);
}
/* makes sure the real size of the buckets array is a power of 2 */
static size_t
round_size(size_t s)
@ -118,8 +122,10 @@ round_size(size_t s)
return i;
}
_Py_hashtable_t *
_Py_hashtable_new_full(size_t data_size, size_t init_size,
_Py_hashtable_new_full(size_t key_size, size_t data_size,
size_t init_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func,
_Py_hashtable_copy_data_func copy_data_func,
@ -144,6 +150,7 @@ _Py_hashtable_new_full(size_t data_size, size_t init_size,
ht->num_buckets = round_size(init_size);
ht->entries = 0;
ht->key_size = key_size;
ht->data_size = data_size;
buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
@ -163,16 +170,19 @@ _Py_hashtable_new_full(size_t data_size, size_t init_size,
return ht;
}
_Py_hashtable_t *
_Py_hashtable_new(size_t data_size,
_Py_hashtable_new(size_t key_size, size_t data_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func)
{
return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE,
return _Py_hashtable_new_full(key_size, data_size,
HASHTABLE_MIN_SIZE,
hash_func, compare_func,
NULL, NULL, NULL, NULL);
}
size_t
_Py_hashtable_size(_Py_hashtable_t *ht)
{
@ -195,7 +205,7 @@ _Py_hashtable_size(_Py_hashtable_t *ht)
for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
void *data;
data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry);
size += ht->get_data_size_func(data);
}
}
@ -203,6 +213,7 @@ _Py_hashtable_size(_Py_hashtable_t *ht)
return size;
}
#ifdef Py_DEBUG
void
_Py_hashtable_print_stats(_Py_hashtable_t *ht)
@ -243,38 +254,47 @@ _Py_hashtable_print_stats(_Py_hashtable_t *ht)
}
#endif
/* Get an entry. Return NULL if the key does not exist. */
_Py_hashtable_entry_t *
_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key)
_Py_hashtable_get_entry(_Py_hashtable_t *ht,
size_t key_size, const void *pkey)
{
Py_uhash_t key_hash;
size_t index;
_Py_hashtable_entry_t *entry;
key_hash = ht->hash_func(key);
assert(key_size == ht->key_size);
key_hash = ht->hash_func(key_size, pkey);
index = key_hash & (ht->num_buckets - 1);
for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
if (entry->key_hash == key_hash && ht->compare_func(key, entry))
if (entry->key_hash == key_hash
&& ht->compare_func(key_size, pkey, entry))
break;
}
return entry;
}
static int
_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
_Py_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
void *data, size_t data_size)
{
Py_uhash_t key_hash;
size_t index;
_Py_hashtable_entry_t *entry, *previous;
key_hash = ht->hash_func(key);
assert(key_size == ht->key_size);
key_hash = ht->hash_func(key_size, pkey);
index = key_hash & (ht->num_buckets - 1);
previous = NULL;
for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
if (entry->key_hash == key_hash && ht->compare_func(key, entry))
if (entry->key_hash == key_hash
&& ht->compare_func(key_size, pkey, entry))
break;
previous = entry;
}
@ -287,7 +307,7 @@ _hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t da
ht->entries--;
if (data != NULL)
_Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
_Py_HASHTABLE_ENTRY_READ_DATA(ht, entry, data_size, data);
ht->alloc.free(entry);
if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
@ -295,26 +315,27 @@ _hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t da
return 1;
}
/* Add a new entry to the hash. The key must not be present in the hash table.
Return 0 on success, -1 on memory error. */
int
_Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
void *data, size_t data_size)
_Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
size_t data_size, void *data)
{
Py_uhash_t key_hash;
size_t index;
_Py_hashtable_entry_t *entry;
assert(key_size == ht->key_size);
assert(data != NULL || data_size == 0);
#ifndef NDEBUG
/* Don't write the assertion on a single line because it is interesting
to know the duplicated entry if the assertion failed. The entry can
be read using a debugger. */
entry = _Py_hashtable_get_entry(ht, key);
entry = _Py_hashtable_get_entry(ht, key_size, pkey);
assert(entry == NULL);
#endif
key_hash = ht->hash_func(key);
key_hash = ht->hash_func(key_size, pkey);
index = key_hash & (ht->num_buckets - 1);
entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
@ -323,11 +344,11 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
return -1;
}
entry->key = (void *)key;
entry->key_hash = key_hash;
memcpy((void *)_Py_HASHTABLE_ENTRY_KEY(entry), pkey, key_size);
assert(data_size == ht->data_size);
memcpy(_Py_HASHTABLE_ENTRY_DATA(entry), data, data_size);
memcpy(_Py_HASHTABLE_ENTRY_DATA(ht, entry), data, data_size);
_Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
ht->entries++;
@ -337,48 +358,48 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
return 0;
}
/* Get data from an entry. Copy entry data into data and return 1 if the entry
exists, return 0 if the entry does not exist. */
int
_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
size_t data_size, void *data)
{
_Py_hashtable_entry_t *entry;
assert(data != NULL);
entry = _Py_hashtable_get_entry(ht, key);
entry = _Py_hashtable_get_entry(ht, key_size, pkey);
if (entry == NULL)
return 0;
_Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
_Py_HASHTABLE_ENTRY_READ_DATA(ht, entry, data_size, data);
return 1;
}
int
_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
_Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
size_t data_size, void *data)
{
assert(data != NULL);
assert(ht->free_data_func == NULL);
return _hashtable_pop_entry(ht, key, data, data_size);
return _Py_hashtable_pop_entry(ht, key_size, pkey, data, data_size);
}
/* Delete an entry. The entry must exist. */
void
_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key)
_Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
{
#ifndef NDEBUG
int found = _hashtable_pop_entry(ht, key, NULL, 0);
int found = _Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
assert(found);
#else
(void)_hashtable_pop_entry(ht, key, NULL, 0);
(void)_Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
#endif
}
/* Prototype for a pointer to a function to be called foreach
key/value pair in the hash by hashtable_foreach(). Iteration
stops if a non-zero value is returned. */
int
_Py_hashtable_foreach(_Py_hashtable_t *ht,
int (*func) (_Py_hashtable_entry_t *entry, void *arg),
_Py_hashtable_foreach_func func,
void *arg)
{
_Py_hashtable_entry_t *entry;
@ -386,7 +407,7 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht,
for (hv = 0; hv < ht->num_buckets; hv++) {
for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
int res = func(entry, arg);
int res = func(ht, entry, arg);
if (res)
return res;
}
@ -394,9 +415,11 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht,
return 0;
}
static void
hashtable_rehash(_Py_hashtable_t *ht)
{
const size_t key_size = ht->key_size;
size_t buckets_size, new_size, bucket;
_Py_slist_t *old_buckets = NULL;
size_t old_num_buckets;
@ -425,7 +448,8 @@ hashtable_rehash(_Py_hashtable_t *ht)
for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
size_t entry_index;
assert(ht->hash_func(entry->key) == entry->key_hash);
assert(ht->hash_func(key_size, _Py_HASHTABLE_ENTRY_KEY(entry)) == entry->key_hash);
next = ENTRY_NEXT(entry);
entry_index = entry->key_hash & (new_size - 1);
@ -436,6 +460,7 @@ hashtable_rehash(_Py_hashtable_t *ht)
ht->alloc.free(old_buckets);
}
void
_Py_hashtable_clear(_Py_hashtable_t *ht)
{
@ -446,7 +471,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht)
for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
next = ENTRY_NEXT(entry);
if (ht->free_data_func)
ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry));
ht->alloc.free(entry);
}
_Py_slist_init(&ht->buckets[i]);
@ -455,6 +480,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht)
hashtable_rehash(ht);
}
void
_Py_hashtable_destroy(_Py_hashtable_t *ht)
{
@ -465,7 +491,7 @@ _Py_hashtable_destroy(_Py_hashtable_t *ht)
while (entry) {
_Py_slist_item_t *entry_next = entry->next;
if (ht->free_data_func)
ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry));
ht->alloc.free(entry);
entry = entry_next;
}
@ -475,17 +501,20 @@ _Py_hashtable_destroy(_Py_hashtable_t *ht)
ht->alloc.free(ht);
}
/* Return a copy of the hash table */
_Py_hashtable_t *
_Py_hashtable_copy(_Py_hashtable_t *src)
{
const size_t key_size = src->key_size;
const size_t data_size = src->data_size;
_Py_hashtable_t *dst;
_Py_hashtable_entry_t *entry;
size_t bucket;
int err;
void *data, *new_data;
dst = _Py_hashtable_new_full(src->data_size, src->num_buckets,
dst = _Py_hashtable_new_full(key_size, data_size,
src->num_buckets,
src->hash_func, src->compare_func,
src->copy_data_func, src->free_data_func,
src->get_data_size_func, &src->alloc);
@ -496,17 +525,20 @@ _Py_hashtable_copy(_Py_hashtable_t *src)
entry = TABLE_HEAD(src, bucket);
for (; entry; entry = ENTRY_NEXT(entry)) {
if (src->copy_data_func) {
data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(src, entry);
new_data = src->copy_data_func(data);
if (new_data != NULL)
err = _Py_hashtable_set(dst, entry->key,
&new_data, src->data_size);
err = _Py_hashtable_set(dst, key_size,
_Py_HASHTABLE_ENTRY_KEY(entry),
data_size, &new_data);
else
err = 1;
}
else {
data = _Py_HASHTABLE_ENTRY_DATA(entry);
err = _Py_hashtable_set(dst, entry->key, data, src->data_size);
data = _Py_HASHTABLE_ENTRY_DATA(src, entry);
err = _Py_hashtable_set(dst, key_size,
_Py_HASHTABLE_ENTRY_KEY(entry),
data_size, data);
}
if (err) {
_Py_hashtable_destroy(dst);
@ -516,4 +548,3 @@ _Py_hashtable_copy(_Py_hashtable_t *src)
}
return dst;
}

View File

@ -1,9 +1,10 @@
#ifndef Py_HASHTABLE_H
#define Py_HASHTABLE_H
/* The whole API is private */
#ifndef Py_LIMITED_API
/* Single linked list */
typedef struct _Py_slist_item_s {
struct _Py_slist_item_s *next;
} _Py_slist_item_t;
@ -16,30 +17,55 @@ typedef struct {
#define _Py_SLIST_HEAD(SLIST) (((_Py_slist_t *)SLIST)->head)
/* _Py_hashtable: table entry */
typedef struct {
/* used by _Py_hashtable_t.buckets to link entries */
_Py_slist_item_t _Py_slist_item;
const void *key;
Py_uhash_t key_hash;
/* data follows */
/* key (key_size bytes) and then data (data_size bytes) follows */
} _Py_hashtable_entry_t;
#define _Py_HASHTABLE_ENTRY_DATA(ENTRY) \
((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t))
#define _Py_HASHTABLE_ENTRY_KEY(ENTRY) \
((const void *)((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t)))
#define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ENTRY) \
(*(void **)_Py_HASHTABLE_ENTRY_DATA(ENTRY))
#define _Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY) \
((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t) + (TABLE)->key_size)
#define _Py_HASHTABLE_ENTRY_READ_DATA(TABLE, DATA, DATA_SIZE, ENTRY) \
#define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(TABLE, ENTRY) \
(*(void **)_Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY))
/* Get a key value from pkey: use memcpy() rather than a pointer dereference
to avoid memory alignment issues. */
#define _Py_HASHTABLE_READ_KEY(KEY_SIZE, PKEY, DST_KEY) \
do { \
assert((DATA_SIZE) == (TABLE)->data_size); \
memcpy(DATA, _Py_HASHTABLE_ENTRY_DATA(ENTRY), DATA_SIZE); \
assert(sizeof(DST_KEY) == (KEY_SIZE)); \
memcpy(&(DST_KEY), (PKEY), sizeof(DST_KEY)); \
} while (0)
typedef Py_uhash_t (*_Py_hashtable_hash_func) (const void *key);
typedef int (*_Py_hashtable_compare_func) (const void *key, const _Py_hashtable_entry_t *he);
#define _Py_HASHTABLE_ENTRY_READ_KEY(KEY_SIZE, ENTRY, KEY) \
do { \
assert(sizeof(KEY) == (KEY_SIZE)); \
memcpy(&(KEY), _Py_HASHTABLE_ENTRY_KEY(ENTRY), sizeof(KEY)); \
} while (0)
#define _Py_HASHTABLE_ENTRY_READ_DATA(TABLE, ENTRY, DATA_SIZE, DATA) \
do { \
assert((DATA_SIZE) == (TABLE)->data_size); \
memcpy(DATA, _Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY), DATA_SIZE); \
} while (0)
/* _Py_hashtable: prototypes */
typedef Py_uhash_t (*_Py_hashtable_hash_func) (size_t key_size,
const void *pkey);
typedef int (*_Py_hashtable_compare_func) (size_t key_size,
const void *pkey,
const _Py_hashtable_entry_t *he);
typedef void* (*_Py_hashtable_copy_data_func)(void *data);
typedef void (*_Py_hashtable_free_data_func)(void *data);
typedef size_t (*_Py_hashtable_get_data_size_func)(void *data);
@ -52,10 +78,14 @@ typedef struct {
void (*free) (void *ptr);
} _Py_hashtable_allocator_t;
/* _Py_hashtable: table */
typedef struct {
size_t num_buckets;
size_t entries; /* Total number of entries in the table. */
_Py_slist_t *buckets;
size_t key_size;
size_t data_size;
_Py_hashtable_hash_func hash_func;
@ -66,16 +96,25 @@ typedef struct {
_Py_hashtable_allocator_t alloc;
} _Py_hashtable_t;
/* hash and compare functions for integers and pointers */
PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(const void *key);
PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_int(const void *key);
PyAPI_FUNC(int) _Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry);
/* hash a pointer (void*) */
PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(
size_t key_size,
const void *pkey);
/* comparison using memcmp() */
PyAPI_FUNC(int) _Py_hashtable_compare_direct(
size_t key_size,
const void *pkey,
const _Py_hashtable_entry_t *entry);
PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new(
size_t key_size,
size_t data_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func);
PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full(
size_t key_size,
size_t data_size,
size_t init_size,
_Py_hashtable_hash_func hash_func,
@ -84,45 +123,95 @@ PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full(
_Py_hashtable_free_data_func free_data_func,
_Py_hashtable_get_data_size_func get_data_size_func,
_Py_hashtable_allocator_t *allocator);
PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_copy(_Py_hashtable_t *src);
PyAPI_FUNC(void) _Py_hashtable_clear(_Py_hashtable_t *ht);
PyAPI_FUNC(void) _Py_hashtable_destroy(_Py_hashtable_t *ht);
typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_entry_t *entry, void *arg);
/* Return a copy of the hash table */
PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_copy(_Py_hashtable_t *src);
PyAPI_FUNC(void) _Py_hashtable_clear(_Py_hashtable_t *ht);
typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_t *ht,
_Py_hashtable_entry_t *entry,
void *arg);
/* Call func() on each entry of the hashtable.
Iteration stops if func() result is non-zero, in this case it's the result
of the call. Otherwise, the function returns 0. */
PyAPI_FUNC(int) _Py_hashtable_foreach(
_Py_hashtable_t *ht,
_Py_hashtable_foreach_func func, void *arg);
_Py_hashtable_foreach_func func,
void *arg);
PyAPI_FUNC(size_t) _Py_hashtable_size(_Py_hashtable_t *ht);
PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
_Py_hashtable_t *ht,
const void *key);
/* Add a new entry to the hash. The key must not be present in the hash table.
Return 0 on success, -1 on memory error.
Don't call directly this function,
but use _Py_HASHTABLE_SET() and _Py_HASHTABLE_SET_NODATA() macros */
PyAPI_FUNC(int) _Py_hashtable_set(
_Py_hashtable_t *ht,
const void *key,
void *data,
size_t data_size);
PyAPI_FUNC(int) _Py_hashtable_get(
_Py_hashtable_t *ht,
const void *key,
void *data,
size_t data_size);
PyAPI_FUNC(int) _Py_hashtable_pop(
_Py_hashtable_t *ht,
const void *key,
void *data,
size_t data_size);
PyAPI_FUNC(void) _Py_hashtable_delete(
_Py_hashtable_t *ht,
const void *key);
size_t key_size,
const void *pkey,
size_t data_size,
void *data);
#define _Py_HASHTABLE_SET(TABLE, KEY, DATA) \
_Py_hashtable_set(TABLE, KEY, &(DATA), sizeof(DATA))
_Py_hashtable_set(TABLE, sizeof(KEY), &KEY, sizeof(DATA), &(DATA))
#define _Py_HASHTABLE_SET_NODATA(TABLE, KEY) \
_Py_hashtable_set(TABLE, sizeof(KEY), &KEY, 0, NULL)
/* Get an entry.
Return NULL if the key does not exist.
Don't call directly this function, but use _Py_HASHTABLE_GET_ENTRY()
macro */
PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
_Py_hashtable_t *ht,
size_t key_size,
const void *pkey);
#define _Py_HASHTABLE_GET_ENTRY(TABLE, KEY) \
_Py_hashtable_get_entry(TABLE, sizeof(KEY), &(KEY))
/* Get data from an entry. Copy entry data into data and return 1 if the entry
exists, return 0 if the entry does not exist.
Don't call directly this function, but use _Py_HASHTABLE_GET() macro */
PyAPI_FUNC(int) _Py_hashtable_get(
_Py_hashtable_t *ht,
size_t key_size,
const void *pkey,
size_t data_size,
void *data);
#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \
_Py_hashtable_get(TABLE, KEY, &(DATA), sizeof(DATA))
_Py_hashtable_get(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))
/* Don't call directly this function, but use _Py_HASHTABLE_POP() macro */
PyAPI_FUNC(int) _Py_hashtable_pop(
_Py_hashtable_t *ht,
size_t key_size,
const void *pkey,
size_t data_size,
void *data);
#define _Py_HASHTABLE_POP(TABLE, KEY, DATA) \
_Py_hashtable_pop(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))
/* Delete an entry.
WARNING: The entry must exist. */
PyAPI_FUNC(void) _Py_hashtable_delete(
_Py_hashtable_t *ht,
size_t key_size,
const void *pkey);
#endif /* Py_LIMITED_API */
#endif

View File

@ -263,10 +263,10 @@ w_ref(PyObject *v, char *flag, WFILE *p)
if (Py_REFCNT(v) == 1)
return 0;
entry = _Py_hashtable_get_entry(p->hashtable, v);
entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
if (entry != NULL) {
/* write the reference index to the stream */
_Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, &w, sizeof(w), entry);
_Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, sizeof(w), &w);
/* we don't store "long" indices in the dict */
assert(0 <= w && w <= 0x7fffffff);
w_byte(TYPE_REF, p);
@ -571,7 +571,8 @@ static int
w_init_refs(WFILE *wf, int version)
{
if (version >= 3) {
wf->hashtable = _Py_hashtable_new(sizeof(int), _Py_hashtable_hash_ptr,
wf->hashtable = _Py_hashtable_new(sizeof(PyObject *), sizeof(int),
_Py_hashtable_hash_ptr,
_Py_hashtable_compare_direct);
if (wf->hashtable == NULL) {
PyErr_NoMemory();
@ -582,9 +583,13 @@ w_init_refs(WFILE *wf, int version)
}
static int
w_decref_entry(_Py_hashtable_entry_t *entry, void *Py_UNUSED(data))
w_decref_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
void *Py_UNUSED(data))
{
Py_XDECREF(entry->key);
PyObject *entry_key;
_Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, entry_key);
Py_XDECREF(entry_key);
return 0;
}