hashtable.h now supports keys of any size

Issue #26588: hashtable.h now supports keys of any size, not only
sizeof(void*). It allows to support key larger than sizeof(void*), but also to
use less memory for key smaller than sizeof(void*).
This commit is contained in:
Victor Stinner 2016-03-21 22:00:58 +01:00
parent 928bff0b26
commit 285cf0a601
4 changed files with 299 additions and 143 deletions

View File

@ -196,23 +196,38 @@ set_reentrant(int reentrant)
} }
#endif #endif
static int static Py_uhash_t
hashtable_compare_unicode(const void *key, const _Py_hashtable_entry_t *entry) hashtable_hash_pyobject(size_t key_size, const void *pkey)
{ {
if (key != NULL && entry->key != NULL) PyObject *obj;
return (PyUnicode_Compare((PyObject *)key, (PyObject *)entry->key) == 0);
_Py_HASHTABLE_READ_KEY(key_size, pkey, obj);
return PyObject_Hash(obj);
}
static int
hashtable_compare_unicode(size_t key_size, const void *pkey,
const _Py_hashtable_entry_t *entry)
{
PyObject *key, *entry_key;
_Py_HASHTABLE_READ_KEY(key_size, pkey, key);
_Py_HASHTABLE_ENTRY_READ_KEY(key_size, entry, entry_key);
if (key != NULL && entry_key != NULL)
return (PyUnicode_Compare(key, entry_key) == 0);
else else
return key == entry->key; return key == entry_key;
} }
static _Py_hashtable_allocator_t hashtable_alloc = {malloc, free}; static _Py_hashtable_allocator_t hashtable_alloc = {malloc, free};
static _Py_hashtable_t * static _Py_hashtable_t *
hashtable_new(size_t data_size, hashtable_new(size_t key_size, size_t data_size,
_Py_hashtable_hash_func hash_func, _Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func) _Py_hashtable_compare_func compare_func)
{ {
return _Py_hashtable_new_full(data_size, 0, return _Py_hashtable_new_full(key_size, data_size, 0,
hash_func, compare_func, hash_func, compare_func,
NULL, NULL, NULL, &hashtable_alloc); NULL, NULL, NULL, &hashtable_alloc);
} }
@ -230,20 +245,25 @@ raw_free(void *ptr)
} }
static Py_uhash_t static Py_uhash_t
hashtable_hash_traceback(const void *key) hashtable_hash_traceback(size_t key_size, const void *pkey)
{ {
const traceback_t *traceback = key; const traceback_t *traceback;
_Py_HASHTABLE_READ_KEY(key_size, pkey, traceback);
return traceback->hash; return traceback->hash;
} }
static int static int
hashtable_compare_traceback(const traceback_t *traceback1, hashtable_compare_traceback(size_t key_size, const void *pkey,
const _Py_hashtable_entry_t *he) const _Py_hashtable_entry_t *he)
{ {
const traceback_t *traceback2 = he->key; traceback_t *traceback1, *traceback2;
const frame_t *frame1, *frame2; const frame_t *frame1, *frame2;
int i; int i;
_Py_HASHTABLE_READ_KEY(key_size, pkey, traceback1);
_Py_HASHTABLE_ENTRY_READ_KEY(key_size, he, traceback2);
if (traceback1->nframe != traceback2->nframe) if (traceback1->nframe != traceback2->nframe)
return 0; return 0;
@ -312,15 +332,16 @@ tracemalloc_get_frame(PyFrameObject *pyframe, frame_t *frame)
} }
/* intern the filename */ /* intern the filename */
entry = _Py_hashtable_get_entry(tracemalloc_filenames, filename); entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_filenames, filename);
if (entry != NULL) { if (entry != NULL) {
filename = (PyObject *)entry->key; _Py_HASHTABLE_ENTRY_READ_KEY(tracemalloc_filenames->key_size, entry,
filename);
} }
else { else {
/* tracemalloc_filenames is responsible to keep a reference /* tracemalloc_filenames is responsible to keep a reference
to the filename */ to the filename */
Py_INCREF(filename); Py_INCREF(filename);
if (_Py_hashtable_set(tracemalloc_filenames, filename, NULL, 0) < 0) { if (_Py_HASHTABLE_SET_NODATA(tracemalloc_filenames, filename) < 0) {
Py_DECREF(filename); Py_DECREF(filename);
#ifdef TRACE_DEBUG #ifdef TRACE_DEBUG
tracemalloc_error("failed to intern the filename"); tracemalloc_error("failed to intern the filename");
@ -403,9 +424,10 @@ traceback_new(void)
traceback->hash = traceback_hash(traceback); traceback->hash = traceback_hash(traceback);
/* intern the traceback */ /* intern the traceback */
entry = _Py_hashtable_get_entry(tracemalloc_tracebacks, traceback); entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_tracebacks, traceback);
if (entry != NULL) { if (entry != NULL) {
traceback = (traceback_t *)entry->key; _Py_HASHTABLE_ENTRY_READ_KEY(tracemalloc_tracebacks->key_size, entry,
traceback);
} }
else { else {
traceback_t *copy; traceback_t *copy;
@ -422,7 +444,7 @@ traceback_new(void)
} }
memcpy(copy, traceback, traceback_size); memcpy(copy, traceback, traceback_size);
if (_Py_hashtable_set(tracemalloc_tracebacks, copy, NULL, 0) < 0) { if (_Py_HASHTABLE_SET_NODATA(tracemalloc_tracebacks, copy) < 0) {
raw_free(copy); raw_free(copy);
#ifdef TRACE_DEBUG #ifdef TRACE_DEBUG
tracemalloc_error("failed to intern the traceback: putdata failed"); tracemalloc_error("failed to intern the traceback: putdata failed");
@ -464,7 +486,7 @@ tracemalloc_remove_trace(void *ptr)
{ {
trace_t trace; trace_t trace;
if (_Py_hashtable_pop(tracemalloc_traces, ptr, &trace, sizeof(trace))) { if (_Py_HASHTABLE_POP(tracemalloc_traces, ptr, trace)) {
assert(tracemalloc_traced_memory >= trace.size); assert(tracemalloc_traced_memory >= trace.size);
tracemalloc_traced_memory -= trace.size; tracemalloc_traced_memory -= trace.size;
} }
@ -714,17 +736,23 @@ tracemalloc_raw_realloc(void *ctx, void *ptr, size_t new_size)
#endif /* TRACE_RAW_MALLOC */ #endif /* TRACE_RAW_MALLOC */
static int static int
tracemalloc_clear_filename(_Py_hashtable_entry_t *entry, void *user_data) tracemalloc_clear_filename(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
void *user_data)
{ {
PyObject *filename = (PyObject *)entry->key; PyObject *filename;
_Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, filename);
Py_DECREF(filename); Py_DECREF(filename);
return 0; return 0;
} }
static int static int
traceback_free_traceback(_Py_hashtable_entry_t *entry, void *user_data) traceback_free_traceback(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
void *user_data)
{ {
traceback_t *traceback = (traceback_t *)entry->key; traceback_t *traceback;
_Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, traceback);
raw_free(traceback); raw_free(traceback);
return 0; return 0;
} }
@ -791,21 +819,20 @@ tracemalloc_init(void)
} }
#endif #endif
tracemalloc_filenames = hashtable_new(0, tracemalloc_filenames = hashtable_new(sizeof(PyObject *), 0,
(_Py_hashtable_hash_func)PyObject_Hash, hashtable_hash_pyobject,
hashtable_compare_unicode); hashtable_compare_unicode);
tracemalloc_tracebacks = hashtable_new(0, tracemalloc_tracebacks = hashtable_new(sizeof(traceback_t *), 0,
(_Py_hashtable_hash_func)hashtable_hash_traceback, hashtable_hash_traceback,
(_Py_hashtable_compare_func)hashtable_compare_traceback); hashtable_compare_traceback);
tracemalloc_traces = hashtable_new(sizeof(trace_t), tracemalloc_traces = hashtable_new(sizeof(void*), sizeof(trace_t),
_Py_hashtable_hash_ptr, _Py_hashtable_hash_ptr,
_Py_hashtable_compare_direct); _Py_hashtable_compare_direct);
if (tracemalloc_filenames == NULL || tracemalloc_tracebacks == NULL if (tracemalloc_filenames == NULL || tracemalloc_tracebacks == NULL
|| tracemalloc_traces == NULL) || tracemalloc_traces == NULL) {
{
PyErr_NoMemory(); PyErr_NoMemory();
return -1; return -1;
} }
@ -840,9 +867,9 @@ tracemalloc_deinit(void)
tracemalloc_stop(); tracemalloc_stop();
/* destroy hash tables */ /* destroy hash tables */
_Py_hashtable_destroy(tracemalloc_traces);
_Py_hashtable_destroy(tracemalloc_tracebacks); _Py_hashtable_destroy(tracemalloc_tracebacks);
_Py_hashtable_destroy(tracemalloc_filenames); _Py_hashtable_destroy(tracemalloc_filenames);
_Py_hashtable_destroy(tracemalloc_traces);
#if defined(WITH_THREAD) && defined(TRACE_RAW_MALLOC) #if defined(WITH_THREAD) && defined(TRACE_RAW_MALLOC)
if (tables_lock != NULL) { if (tables_lock != NULL) {
@ -935,8 +962,9 @@ tracemalloc_stop(void)
PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem); PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem);
PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj); PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj);
/* release memory */
tracemalloc_clear_traces(); tracemalloc_clear_traces();
/* release memory */
raw_free(tracemalloc_traceback); raw_free(tracemalloc_traceback);
tracemalloc_traceback = NULL; tracemalloc_traceback = NULL;
} }
@ -1065,14 +1093,15 @@ typedef struct {
} get_traces_t; } get_traces_t;
static int static int
tracemalloc_get_traces_fill(_Py_hashtable_entry_t *entry, void *user_data) tracemalloc_get_traces_fill(_Py_hashtable_t *traces, _Py_hashtable_entry_t *entry,
void *user_data)
{ {
get_traces_t *get_traces = user_data; get_traces_t *get_traces = user_data;
trace_t *trace; trace_t *trace;
PyObject *tracemalloc_obj; PyObject *tracemalloc_obj;
int res; int res;
trace = (trace_t *)_Py_HASHTABLE_ENTRY_DATA(entry); trace = (trace_t *)_Py_HASHTABLE_ENTRY_DATA(traces, entry);
tracemalloc_obj = trace_to_pyobject(trace, get_traces->tracebacks); tracemalloc_obj = trace_to_pyobject(trace, get_traces->tracebacks);
if (tracemalloc_obj == NULL) if (tracemalloc_obj == NULL)
@ -1087,9 +1116,11 @@ tracemalloc_get_traces_fill(_Py_hashtable_entry_t *entry, void *user_data)
} }
static int static int
tracemalloc_pyobject_decref_cb(_Py_hashtable_entry_t *entry, void *user_data) tracemalloc_pyobject_decref_cb(_Py_hashtable_t *tracebacks,
_Py_hashtable_entry_t *entry,
void *user_data)
{ {
PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(tracebacks, entry);
Py_DECREF(obj); Py_DECREF(obj);
return 0; return 0;
} }
@ -1120,7 +1151,7 @@ py_tracemalloc_get_traces(PyObject *self, PyObject *obj)
/* the traceback hash table is used temporarily to intern traceback tuple /* the traceback hash table is used temporarily to intern traceback tuple
of (filename, lineno) tuples */ of (filename, lineno) tuples */
get_traces.tracebacks = hashtable_new(sizeof(PyObject *), get_traces.tracebacks = hashtable_new(sizeof(traceback_t *), sizeof(PyObject *),
_Py_hashtable_hash_ptr, _Py_hashtable_hash_ptr,
_Py_hashtable_compare_direct); _Py_hashtable_compare_direct);
if (get_traces.tracebacks == NULL) { if (get_traces.tracebacks == NULL) {
@ -1152,7 +1183,7 @@ error:
finally: finally:
if (get_traces.tracebacks != NULL) { if (get_traces.tracebacks != NULL) {
_Py_hashtable_foreach(get_traces.tracebacks, _Py_hashtable_foreach(get_traces.tracebacks,
tracemalloc_pyobject_decref_cb, NULL); tracemalloc_pyobject_decref_cb, NULL);
_Py_hashtable_destroy(get_traces.tracebacks); _Py_hashtable_destroy(get_traces.tracebacks);
} }
if (get_traces.traces != NULL) if (get_traces.traces != NULL)

View File

@ -1,5 +1,5 @@
/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash /* The implementation of the hash table (_Py_hashtable_t) is based on the
project: cfuhash project:
http://sourceforge.net/projects/libcfu/ http://sourceforge.net/projects/libcfu/
Copyright of cfuhash: Copyright of cfuhash:
@ -59,7 +59,7 @@
#define ENTRY_NEXT(ENTRY) \ #define ENTRY_NEXT(ENTRY) \
((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY)) ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
#define HASHTABLE_ITEM_SIZE(HT) \ #define HASHTABLE_ITEM_SIZE(HT) \
(sizeof(_Py_hashtable_entry_t) + (HT)->data_size) (sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size)
/* Forward declaration */ /* Forward declaration */
static void hashtable_rehash(_Py_hashtable_t *ht); static void hashtable_rehash(_Py_hashtable_t *ht);
@ -70,6 +70,7 @@ _Py_slist_init(_Py_slist_t *list)
list->head = NULL; list->head = NULL;
} }
static void static void
_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
{ {
@ -77,6 +78,7 @@ _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
list->head = item; list->head = item;
} }
static void static void
_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
_Py_slist_item_t *item) _Py_slist_item_t *item)
@ -87,24 +89,26 @@ _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
list->head = item->next; list->head = item->next;
} }
Py_uhash_t
_Py_hashtable_hash_int(const void *key)
{
return (Py_uhash_t)key;
}
Py_uhash_t Py_uhash_t
_Py_hashtable_hash_ptr(const void *key) _Py_hashtable_hash_ptr(size_t key_size, const void *pkey)
{ {
void *key;
_Py_HASHTABLE_READ_KEY(key_size, pkey, key);
return (Py_uhash_t)_Py_HashPointer((void *)key); return (Py_uhash_t)_Py_HashPointer((void *)key);
} }
int int
_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry) _Py_hashtable_compare_direct(size_t key_size, const void *pkey,
const _Py_hashtable_entry_t *entry)
{ {
return entry->key == key; const void *pkey2 = _Py_HASHTABLE_ENTRY_KEY(entry);
return (memcmp(pkey, pkey2, key_size) == 0);
} }
/* makes sure the real size of the buckets array is a power of 2 */ /* makes sure the real size of the buckets array is a power of 2 */
static size_t static size_t
round_size(size_t s) round_size(size_t s)
@ -118,8 +122,10 @@ round_size(size_t s)
return i; return i;
} }
_Py_hashtable_t * _Py_hashtable_t *
_Py_hashtable_new_full(size_t data_size, size_t init_size, _Py_hashtable_new_full(size_t key_size, size_t data_size,
size_t init_size,
_Py_hashtable_hash_func hash_func, _Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func, _Py_hashtable_compare_func compare_func,
_Py_hashtable_copy_data_func copy_data_func, _Py_hashtable_copy_data_func copy_data_func,
@ -144,6 +150,7 @@ _Py_hashtable_new_full(size_t data_size, size_t init_size,
ht->num_buckets = round_size(init_size); ht->num_buckets = round_size(init_size);
ht->entries = 0; ht->entries = 0;
ht->key_size = key_size;
ht->data_size = data_size; ht->data_size = data_size;
buckets_size = ht->num_buckets * sizeof(ht->buckets[0]); buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
@ -163,16 +170,19 @@ _Py_hashtable_new_full(size_t data_size, size_t init_size,
return ht; return ht;
} }
_Py_hashtable_t * _Py_hashtable_t *
_Py_hashtable_new(size_t data_size, _Py_hashtable_new(size_t key_size, size_t data_size,
_Py_hashtable_hash_func hash_func, _Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func) _Py_hashtable_compare_func compare_func)
{ {
return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE, return _Py_hashtable_new_full(key_size, data_size,
HASHTABLE_MIN_SIZE,
hash_func, compare_func, hash_func, compare_func,
NULL, NULL, NULL, NULL); NULL, NULL, NULL, NULL);
} }
size_t size_t
_Py_hashtable_size(_Py_hashtable_t *ht) _Py_hashtable_size(_Py_hashtable_t *ht)
{ {
@ -195,7 +205,7 @@ _Py_hashtable_size(_Py_hashtable_t *ht)
for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
void *data; void *data;
data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry);
size += ht->get_data_size_func(data); size += ht->get_data_size_func(data);
} }
} }
@ -203,6 +213,7 @@ _Py_hashtable_size(_Py_hashtable_t *ht)
return size; return size;
} }
#ifdef Py_DEBUG #ifdef Py_DEBUG
void void
_Py_hashtable_print_stats(_Py_hashtable_t *ht) _Py_hashtable_print_stats(_Py_hashtable_t *ht)
@ -243,38 +254,47 @@ _Py_hashtable_print_stats(_Py_hashtable_t *ht)
} }
#endif #endif
/* Get an entry. Return NULL if the key does not exist. */
_Py_hashtable_entry_t * _Py_hashtable_entry_t *
_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key) _Py_hashtable_get_entry(_Py_hashtable_t *ht,
size_t key_size, const void *pkey)
{ {
Py_uhash_t key_hash; Py_uhash_t key_hash;
size_t index; size_t index;
_Py_hashtable_entry_t *entry; _Py_hashtable_entry_t *entry;
key_hash = ht->hash_func(key); assert(key_size == ht->key_size);
key_hash = ht->hash_func(key_size, pkey);
index = key_hash & (ht->num_buckets - 1); index = key_hash & (ht->num_buckets - 1);
for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
if (entry->key_hash == key_hash && ht->compare_func(key, entry)) if (entry->key_hash == key_hash
&& ht->compare_func(key_size, pkey, entry))
break; break;
} }
return entry; return entry;
} }
static int static int
_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) _Py_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
void *data, size_t data_size)
{ {
Py_uhash_t key_hash; Py_uhash_t key_hash;
size_t index; size_t index;
_Py_hashtable_entry_t *entry, *previous; _Py_hashtable_entry_t *entry, *previous;
key_hash = ht->hash_func(key); assert(key_size == ht->key_size);
key_hash = ht->hash_func(key_size, pkey);
index = key_hash & (ht->num_buckets - 1); index = key_hash & (ht->num_buckets - 1);
previous = NULL; previous = NULL;
for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
if (entry->key_hash == key_hash && ht->compare_func(key, entry)) if (entry->key_hash == key_hash
&& ht->compare_func(key_size, pkey, entry))
break; break;
previous = entry; previous = entry;
} }
@ -287,7 +307,7 @@ _hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t da
ht->entries--; ht->entries--;
if (data != NULL) if (data != NULL)
_Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); _Py_HASHTABLE_ENTRY_READ_DATA(ht, entry, data_size, data);
ht->alloc.free(entry); ht->alloc.free(entry);
if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW) if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
@ -295,26 +315,27 @@ _hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t da
return 1; return 1;
} }
/* Add a new entry to the hash. The key must not be present in the hash table.
Return 0 on success, -1 on memory error. */
int int
_Py_hashtable_set(_Py_hashtable_t *ht, const void *key, _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
void *data, size_t data_size) size_t data_size, void *data)
{ {
Py_uhash_t key_hash; Py_uhash_t key_hash;
size_t index; size_t index;
_Py_hashtable_entry_t *entry; _Py_hashtable_entry_t *entry;
assert(key_size == ht->key_size);
assert(data != NULL || data_size == 0); assert(data != NULL || data_size == 0);
#ifndef NDEBUG #ifndef NDEBUG
/* Don't write the assertion on a single line because it is interesting /* Don't write the assertion on a single line because it is interesting
to know the duplicated entry if the assertion failed. The entry can to know the duplicated entry if the assertion failed. The entry can
be read using a debugger. */ be read using a debugger. */
entry = _Py_hashtable_get_entry(ht, key); entry = _Py_hashtable_get_entry(ht, key_size, pkey);
assert(entry == NULL); assert(entry == NULL);
#endif #endif
key_hash = ht->hash_func(key); key_hash = ht->hash_func(key_size, pkey);
index = key_hash & (ht->num_buckets - 1); index = key_hash & (ht->num_buckets - 1);
entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht)); entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
@ -323,11 +344,11 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
return -1; return -1;
} }
entry->key = (void *)key;
entry->key_hash = key_hash; entry->key_hash = key_hash;
memcpy((void *)_Py_HASHTABLE_ENTRY_KEY(entry), pkey, key_size);
assert(data_size == ht->data_size); assert(data_size == ht->data_size);
memcpy(_Py_HASHTABLE_ENTRY_DATA(entry), data, data_size); memcpy(_Py_HASHTABLE_ENTRY_DATA(ht, entry), data, data_size);
_Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
ht->entries++; ht->entries++;
@ -337,48 +358,48 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
return 0; return 0;
} }
/* Get data from an entry. Copy entry data into data and return 1 if the entry
exists, return 0 if the entry does not exist. */
int int
_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) _Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
size_t data_size, void *data)
{ {
_Py_hashtable_entry_t *entry; _Py_hashtable_entry_t *entry;
assert(data != NULL); assert(data != NULL);
entry = _Py_hashtable_get_entry(ht, key); entry = _Py_hashtable_get_entry(ht, key_size, pkey);
if (entry == NULL) if (entry == NULL)
return 0; return 0;
_Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); _Py_HASHTABLE_ENTRY_READ_DATA(ht, entry, data_size, data);
return 1; return 1;
} }
int int
_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) _Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
size_t data_size, void *data)
{ {
assert(data != NULL); assert(data != NULL);
assert(ht->free_data_func == NULL); assert(ht->free_data_func == NULL);
return _hashtable_pop_entry(ht, key, data, data_size); return _Py_hashtable_pop_entry(ht, key_size, pkey, data, data_size);
} }
/* Delete an entry. The entry must exist. */
void void
_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key) _Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
{ {
#ifndef NDEBUG #ifndef NDEBUG
int found = _hashtable_pop_entry(ht, key, NULL, 0); int found = _Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
assert(found); assert(found);
#else #else
(void)_hashtable_pop_entry(ht, key, NULL, 0); (void)_Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0);
#endif #endif
} }
/* Prototype for a pointer to a function to be called foreach
key/value pair in the hash by hashtable_foreach(). Iteration
stops if a non-zero value is returned. */
int int
_Py_hashtable_foreach(_Py_hashtable_t *ht, _Py_hashtable_foreach(_Py_hashtable_t *ht,
int (*func) (_Py_hashtable_entry_t *entry, void *arg), _Py_hashtable_foreach_func func,
void *arg) void *arg)
{ {
_Py_hashtable_entry_t *entry; _Py_hashtable_entry_t *entry;
@ -386,7 +407,7 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht,
for (hv = 0; hv < ht->num_buckets; hv++) { for (hv = 0; hv < ht->num_buckets; hv++) {
for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
int res = func(entry, arg); int res = func(ht, entry, arg);
if (res) if (res)
return res; return res;
} }
@ -394,9 +415,11 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht,
return 0; return 0;
} }
static void static void
hashtable_rehash(_Py_hashtable_t *ht) hashtable_rehash(_Py_hashtable_t *ht)
{ {
const size_t key_size = ht->key_size;
size_t buckets_size, new_size, bucket; size_t buckets_size, new_size, bucket;
_Py_slist_t *old_buckets = NULL; _Py_slist_t *old_buckets = NULL;
size_t old_num_buckets; size_t old_num_buckets;
@ -425,7 +448,8 @@ hashtable_rehash(_Py_hashtable_t *ht)
for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) { for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
size_t entry_index; size_t entry_index;
assert(ht->hash_func(entry->key) == entry->key_hash);
assert(ht->hash_func(key_size, _Py_HASHTABLE_ENTRY_KEY(entry)) == entry->key_hash);
next = ENTRY_NEXT(entry); next = ENTRY_NEXT(entry);
entry_index = entry->key_hash & (new_size - 1); entry_index = entry->key_hash & (new_size - 1);
@ -436,6 +460,7 @@ hashtable_rehash(_Py_hashtable_t *ht)
ht->alloc.free(old_buckets); ht->alloc.free(old_buckets);
} }
void void
_Py_hashtable_clear(_Py_hashtable_t *ht) _Py_hashtable_clear(_Py_hashtable_t *ht)
{ {
@ -446,7 +471,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht)
for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) { for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
next = ENTRY_NEXT(entry); next = ENTRY_NEXT(entry);
if (ht->free_data_func) if (ht->free_data_func)
ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry));
ht->alloc.free(entry); ht->alloc.free(entry);
} }
_Py_slist_init(&ht->buckets[i]); _Py_slist_init(&ht->buckets[i]);
@ -455,6 +480,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht)
hashtable_rehash(ht); hashtable_rehash(ht);
} }
void void
_Py_hashtable_destroy(_Py_hashtable_t *ht) _Py_hashtable_destroy(_Py_hashtable_t *ht)
{ {
@ -465,7 +491,7 @@ _Py_hashtable_destroy(_Py_hashtable_t *ht)
while (entry) { while (entry) {
_Py_slist_item_t *entry_next = entry->next; _Py_slist_item_t *entry_next = entry->next;
if (ht->free_data_func) if (ht->free_data_func)
ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry));
ht->alloc.free(entry); ht->alloc.free(entry);
entry = entry_next; entry = entry_next;
} }
@ -475,17 +501,20 @@ _Py_hashtable_destroy(_Py_hashtable_t *ht)
ht->alloc.free(ht); ht->alloc.free(ht);
} }
/* Return a copy of the hash table */
_Py_hashtable_t * _Py_hashtable_t *
_Py_hashtable_copy(_Py_hashtable_t *src) _Py_hashtable_copy(_Py_hashtable_t *src)
{ {
const size_t key_size = src->key_size;
const size_t data_size = src->data_size;
_Py_hashtable_t *dst; _Py_hashtable_t *dst;
_Py_hashtable_entry_t *entry; _Py_hashtable_entry_t *entry;
size_t bucket; size_t bucket;
int err; int err;
void *data, *new_data; void *data, *new_data;
dst = _Py_hashtable_new_full(src->data_size, src->num_buckets, dst = _Py_hashtable_new_full(key_size, data_size,
src->num_buckets,
src->hash_func, src->compare_func, src->hash_func, src->compare_func,
src->copy_data_func, src->free_data_func, src->copy_data_func, src->free_data_func,
src->get_data_size_func, &src->alloc); src->get_data_size_func, &src->alloc);
@ -496,17 +525,20 @@ _Py_hashtable_copy(_Py_hashtable_t *src)
entry = TABLE_HEAD(src, bucket); entry = TABLE_HEAD(src, bucket);
for (; entry; entry = ENTRY_NEXT(entry)) { for (; entry; entry = ENTRY_NEXT(entry)) {
if (src->copy_data_func) { if (src->copy_data_func) {
data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(src, entry);
new_data = src->copy_data_func(data); new_data = src->copy_data_func(data);
if (new_data != NULL) if (new_data != NULL)
err = _Py_hashtable_set(dst, entry->key, err = _Py_hashtable_set(dst, key_size,
&new_data, src->data_size); _Py_HASHTABLE_ENTRY_KEY(entry),
data_size, &new_data);
else else
err = 1; err = 1;
} }
else { else {
data = _Py_HASHTABLE_ENTRY_DATA(entry); data = _Py_HASHTABLE_ENTRY_DATA(src, entry);
err = _Py_hashtable_set(dst, entry->key, data, src->data_size); err = _Py_hashtable_set(dst, key_size,
_Py_HASHTABLE_ENTRY_KEY(entry),
data_size, data);
} }
if (err) { if (err) {
_Py_hashtable_destroy(dst); _Py_hashtable_destroy(dst);
@ -516,4 +548,3 @@ _Py_hashtable_copy(_Py_hashtable_t *src)
} }
return dst; return dst;
} }

View File

@ -1,9 +1,10 @@
#ifndef Py_HASHTABLE_H #ifndef Py_HASHTABLE_H
#define Py_HASHTABLE_H #define Py_HASHTABLE_H
/* The whole API is private */ /* The whole API is private */
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API
/* Single linked list */
typedef struct _Py_slist_item_s { typedef struct _Py_slist_item_s {
struct _Py_slist_item_s *next; struct _Py_slist_item_s *next;
} _Py_slist_item_t; } _Py_slist_item_t;
@ -16,30 +17,55 @@ typedef struct {
#define _Py_SLIST_HEAD(SLIST) (((_Py_slist_t *)SLIST)->head) #define _Py_SLIST_HEAD(SLIST) (((_Py_slist_t *)SLIST)->head)
/* _Py_hashtable: table entry */
typedef struct { typedef struct {
/* used by _Py_hashtable_t.buckets to link entries */ /* used by _Py_hashtable_t.buckets to link entries */
_Py_slist_item_t _Py_slist_item; _Py_slist_item_t _Py_slist_item;
const void *key;
Py_uhash_t key_hash; Py_uhash_t key_hash;
/* data follows */ /* key (key_size bytes) and then data (data_size bytes) follows */
} _Py_hashtable_entry_t; } _Py_hashtable_entry_t;
#define _Py_HASHTABLE_ENTRY_DATA(ENTRY) \ #define _Py_HASHTABLE_ENTRY_KEY(ENTRY) \
((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t)) ((const void *)((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t)))
#define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ENTRY) \ #define _Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY) \
(*(void **)_Py_HASHTABLE_ENTRY_DATA(ENTRY)) ((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t) + (TABLE)->key_size)
#define _Py_HASHTABLE_ENTRY_READ_DATA(TABLE, DATA, DATA_SIZE, ENTRY) \ #define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(TABLE, ENTRY) \
(*(void **)_Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY))
/* Get a key value from pkey: use memcpy() rather than a pointer dereference
to avoid memory alignment issues. */
#define _Py_HASHTABLE_READ_KEY(KEY_SIZE, PKEY, DST_KEY) \
do { \ do { \
assert((DATA_SIZE) == (TABLE)->data_size); \ assert(sizeof(DST_KEY) == (KEY_SIZE)); \
memcpy(DATA, _Py_HASHTABLE_ENTRY_DATA(ENTRY), DATA_SIZE); \ memcpy(&(DST_KEY), (PKEY), sizeof(DST_KEY)); \
} while (0) } while (0)
typedef Py_uhash_t (*_Py_hashtable_hash_func) (const void *key); #define _Py_HASHTABLE_ENTRY_READ_KEY(KEY_SIZE, ENTRY, KEY) \
typedef int (*_Py_hashtable_compare_func) (const void *key, const _Py_hashtable_entry_t *he); do { \
assert(sizeof(KEY) == (KEY_SIZE)); \
memcpy(&(KEY), _Py_HASHTABLE_ENTRY_KEY(ENTRY), sizeof(KEY)); \
} while (0)
#define _Py_HASHTABLE_ENTRY_READ_DATA(TABLE, ENTRY, DATA_SIZE, DATA) \
do { \
assert((DATA_SIZE) == (TABLE)->data_size); \
memcpy(DATA, _Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY), DATA_SIZE); \
} while (0)
/* _Py_hashtable: prototypes */
typedef Py_uhash_t (*_Py_hashtable_hash_func) (size_t key_size,
const void *pkey);
typedef int (*_Py_hashtable_compare_func) (size_t key_size,
const void *pkey,
const _Py_hashtable_entry_t *he);
typedef void* (*_Py_hashtable_copy_data_func)(void *data); typedef void* (*_Py_hashtable_copy_data_func)(void *data);
typedef void (*_Py_hashtable_free_data_func)(void *data); typedef void (*_Py_hashtable_free_data_func)(void *data);
typedef size_t (*_Py_hashtable_get_data_size_func)(void *data); typedef size_t (*_Py_hashtable_get_data_size_func)(void *data);
@ -52,10 +78,14 @@ typedef struct {
void (*free) (void *ptr); void (*free) (void *ptr);
} _Py_hashtable_allocator_t; } _Py_hashtable_allocator_t;
/* _Py_hashtable: table */
typedef struct { typedef struct {
size_t num_buckets; size_t num_buckets;
size_t entries; /* Total number of entries in the table. */ size_t entries; /* Total number of entries in the table. */
_Py_slist_t *buckets; _Py_slist_t *buckets;
size_t key_size;
size_t data_size; size_t data_size;
_Py_hashtable_hash_func hash_func; _Py_hashtable_hash_func hash_func;
@ -66,16 +96,25 @@ typedef struct {
_Py_hashtable_allocator_t alloc; _Py_hashtable_allocator_t alloc;
} _Py_hashtable_t; } _Py_hashtable_t;
/* hash and compare functions for integers and pointers */ /* hash a pointer (void*) */
PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(const void *key); PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(
PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_int(const void *key); size_t key_size,
PyAPI_FUNC(int) _Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry); const void *pkey);
/* comparison using memcmp() */
PyAPI_FUNC(int) _Py_hashtable_compare_direct(
size_t key_size,
const void *pkey,
const _Py_hashtable_entry_t *entry);
PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new( PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new(
size_t key_size,
size_t data_size, size_t data_size,
_Py_hashtable_hash_func hash_func, _Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func); _Py_hashtable_compare_func compare_func);
PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full( PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full(
size_t key_size,
size_t data_size, size_t data_size,
size_t init_size, size_t init_size,
_Py_hashtable_hash_func hash_func, _Py_hashtable_hash_func hash_func,
@ -84,45 +123,95 @@ PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full(
_Py_hashtable_free_data_func free_data_func, _Py_hashtable_free_data_func free_data_func,
_Py_hashtable_get_data_size_func get_data_size_func, _Py_hashtable_get_data_size_func get_data_size_func,
_Py_hashtable_allocator_t *allocator); _Py_hashtable_allocator_t *allocator);
PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_copy(_Py_hashtable_t *src);
PyAPI_FUNC(void) _Py_hashtable_clear(_Py_hashtable_t *ht);
PyAPI_FUNC(void) _Py_hashtable_destroy(_Py_hashtable_t *ht); PyAPI_FUNC(void) _Py_hashtable_destroy(_Py_hashtable_t *ht);
typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_entry_t *entry, void *arg); /* Return a copy of the hash table */
PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_copy(_Py_hashtable_t *src);
PyAPI_FUNC(void) _Py_hashtable_clear(_Py_hashtable_t *ht);
typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_t *ht,
_Py_hashtable_entry_t *entry,
void *arg);
/* Call func() on each entry of the hashtable.
Iteration stops if func() result is non-zero, in this case it's the result
of the call. Otherwise, the function returns 0. */
PyAPI_FUNC(int) _Py_hashtable_foreach( PyAPI_FUNC(int) _Py_hashtable_foreach(
_Py_hashtable_t *ht, _Py_hashtable_t *ht,
_Py_hashtable_foreach_func func, void *arg); _Py_hashtable_foreach_func func,
void *arg);
PyAPI_FUNC(size_t) _Py_hashtable_size(_Py_hashtable_t *ht); PyAPI_FUNC(size_t) _Py_hashtable_size(_Py_hashtable_t *ht);
PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry( /* Add a new entry to the hash. The key must not be present in the hash table.
_Py_hashtable_t *ht, Return 0 on success, -1 on memory error.
const void *key);
Don't call directly this function,
but use _Py_HASHTABLE_SET() and _Py_HASHTABLE_SET_NODATA() macros */
PyAPI_FUNC(int) _Py_hashtable_set( PyAPI_FUNC(int) _Py_hashtable_set(
_Py_hashtable_t *ht, _Py_hashtable_t *ht,
const void *key, size_t key_size,
void *data, const void *pkey,
size_t data_size); size_t data_size,
PyAPI_FUNC(int) _Py_hashtable_get( void *data);
_Py_hashtable_t *ht,
const void *key,
void *data,
size_t data_size);
PyAPI_FUNC(int) _Py_hashtable_pop(
_Py_hashtable_t *ht,
const void *key,
void *data,
size_t data_size);
PyAPI_FUNC(void) _Py_hashtable_delete(
_Py_hashtable_t *ht,
const void *key);
#define _Py_HASHTABLE_SET(TABLE, KEY, DATA) \ #define _Py_HASHTABLE_SET(TABLE, KEY, DATA) \
_Py_hashtable_set(TABLE, KEY, &(DATA), sizeof(DATA)) _Py_hashtable_set(TABLE, sizeof(KEY), &KEY, sizeof(DATA), &(DATA))
#define _Py_HASHTABLE_SET_NODATA(TABLE, KEY) \
_Py_hashtable_set(TABLE, sizeof(KEY), &KEY, 0, NULL)
/* Get an entry.
Return NULL if the key does not exist.
Don't call directly this function, but use _Py_HASHTABLE_GET_ENTRY()
macro */
PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
_Py_hashtable_t *ht,
size_t key_size,
const void *pkey);
#define _Py_HASHTABLE_GET_ENTRY(TABLE, KEY) \
_Py_hashtable_get_entry(TABLE, sizeof(KEY), &(KEY))
/* Get data from an entry. Copy entry data into data and return 1 if the entry
exists, return 0 if the entry does not exist.
Don't call directly this function, but use _Py_HASHTABLE_GET() macro */
PyAPI_FUNC(int) _Py_hashtable_get(
_Py_hashtable_t *ht,
size_t key_size,
const void *pkey,
size_t data_size,
void *data);
#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \ #define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \
_Py_hashtable_get(TABLE, KEY, &(DATA), sizeof(DATA)) _Py_hashtable_get(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))
/* Don't call directly this function, but use _Py_HASHTABLE_POP() macro */
PyAPI_FUNC(int) _Py_hashtable_pop(
_Py_hashtable_t *ht,
size_t key_size,
const void *pkey,
size_t data_size,
void *data);
#define _Py_HASHTABLE_POP(TABLE, KEY, DATA) \
_Py_hashtable_pop(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))
/* Delete an entry.
WARNING: The entry must exist. */
PyAPI_FUNC(void) _Py_hashtable_delete(
_Py_hashtable_t *ht,
size_t key_size,
const void *pkey);
#endif /* Py_LIMITED_API */ #endif /* Py_LIMITED_API */
#endif #endif

View File

@ -263,10 +263,10 @@ w_ref(PyObject *v, char *flag, WFILE *p)
if (Py_REFCNT(v) == 1) if (Py_REFCNT(v) == 1)
return 0; return 0;
entry = _Py_hashtable_get_entry(p->hashtable, v); entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v);
if (entry != NULL) { if (entry != NULL) {
/* write the reference index to the stream */ /* write the reference index to the stream */
_Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, &w, sizeof(w), entry); _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, sizeof(w), &w);
/* we don't store "long" indices in the dict */ /* we don't store "long" indices in the dict */
assert(0 <= w && w <= 0x7fffffff); assert(0 <= w && w <= 0x7fffffff);
w_byte(TYPE_REF, p); w_byte(TYPE_REF, p);
@ -571,7 +571,8 @@ static int
w_init_refs(WFILE *wf, int version) w_init_refs(WFILE *wf, int version)
{ {
if (version >= 3) { if (version >= 3) {
wf->hashtable = _Py_hashtable_new(sizeof(int), _Py_hashtable_hash_ptr, wf->hashtable = _Py_hashtable_new(sizeof(PyObject *), sizeof(int),
_Py_hashtable_hash_ptr,
_Py_hashtable_compare_direct); _Py_hashtable_compare_direct);
if (wf->hashtable == NULL) { if (wf->hashtable == NULL) {
PyErr_NoMemory(); PyErr_NoMemory();
@ -582,9 +583,13 @@ w_init_refs(WFILE *wf, int version)
} }
static int static int
w_decref_entry(_Py_hashtable_entry_t *entry, void *Py_UNUSED(data)) w_decref_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry,
void *Py_UNUSED(data))
{ {
Py_XDECREF(entry->key); PyObject *entry_key;
_Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, entry_key);
Py_XDECREF(entry_key);
return 0; return 0;
} }