From 285cf0a6014af147b82a3446d9e088ad0332720d Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 21 Mar 2016 22:00:58 +0100 Subject: [PATCH] hashtable.h now supports keys of any size Issue #26588: hashtable.h now supports keys of any size, not only sizeof(void*). It allows to support key larger than sizeof(void*), but also to use less memory for key smaller than sizeof(void*). --- Modules/_tracemalloc.c | 107 ++++++++++++++++--------- Modules/hashtable.c | 147 ++++++++++++++++++++-------------- Modules/hashtable.h | 173 +++++++++++++++++++++++++++++++---------- Python/marshal.c | 15 ++-- 4 files changed, 299 insertions(+), 143 deletions(-) diff --git a/Modules/_tracemalloc.c b/Modules/_tracemalloc.c index 5752904d477..6799eb6b642 100644 --- a/Modules/_tracemalloc.c +++ b/Modules/_tracemalloc.c @@ -196,23 +196,38 @@ set_reentrant(int reentrant) } #endif -static int -hashtable_compare_unicode(const void *key, const _Py_hashtable_entry_t *entry) +static Py_uhash_t +hashtable_hash_pyobject(size_t key_size, const void *pkey) { - if (key != NULL && entry->key != NULL) - return (PyUnicode_Compare((PyObject *)key, (PyObject *)entry->key) == 0); + PyObject *obj; + + _Py_HASHTABLE_READ_KEY(key_size, pkey, obj); + return PyObject_Hash(obj); +} + +static int +hashtable_compare_unicode(size_t key_size, const void *pkey, + const _Py_hashtable_entry_t *entry) +{ + PyObject *key, *entry_key; + + _Py_HASHTABLE_READ_KEY(key_size, pkey, key); + _Py_HASHTABLE_ENTRY_READ_KEY(key_size, entry, entry_key); + + if (key != NULL && entry_key != NULL) + return (PyUnicode_Compare(key, entry_key) == 0); else - return key == entry->key; + return key == entry_key; } static _Py_hashtable_allocator_t hashtable_alloc = {malloc, free}; static _Py_hashtable_t * -hashtable_new(size_t data_size, +hashtable_new(size_t key_size, size_t data_size, _Py_hashtable_hash_func hash_func, _Py_hashtable_compare_func compare_func) { - return _Py_hashtable_new_full(data_size, 0, + return _Py_hashtable_new_full(key_size, data_size, 0, hash_func, compare_func, NULL, NULL, NULL, &hashtable_alloc); } @@ -230,20 +245,25 @@ raw_free(void *ptr) } static Py_uhash_t -hashtable_hash_traceback(const void *key) +hashtable_hash_traceback(size_t key_size, const void *pkey) { - const traceback_t *traceback = key; + const traceback_t *traceback; + + _Py_HASHTABLE_READ_KEY(key_size, pkey, traceback); return traceback->hash; } static int -hashtable_compare_traceback(const traceback_t *traceback1, +hashtable_compare_traceback(size_t key_size, const void *pkey, const _Py_hashtable_entry_t *he) { - const traceback_t *traceback2 = he->key; + traceback_t *traceback1, *traceback2; const frame_t *frame1, *frame2; int i; + _Py_HASHTABLE_READ_KEY(key_size, pkey, traceback1); + _Py_HASHTABLE_ENTRY_READ_KEY(key_size, he, traceback2); + if (traceback1->nframe != traceback2->nframe) return 0; @@ -312,15 +332,16 @@ tracemalloc_get_frame(PyFrameObject *pyframe, frame_t *frame) } /* intern the filename */ - entry = _Py_hashtable_get_entry(tracemalloc_filenames, filename); + entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_filenames, filename); if (entry != NULL) { - filename = (PyObject *)entry->key; + _Py_HASHTABLE_ENTRY_READ_KEY(tracemalloc_filenames->key_size, entry, + filename); } else { /* tracemalloc_filenames is responsible to keep a reference to the filename */ Py_INCREF(filename); - if (_Py_hashtable_set(tracemalloc_filenames, filename, NULL, 0) < 0) { + if (_Py_HASHTABLE_SET_NODATA(tracemalloc_filenames, filename) < 0) { Py_DECREF(filename); #ifdef TRACE_DEBUG tracemalloc_error("failed to intern the filename"); @@ -403,9 +424,10 @@ traceback_new(void) traceback->hash = traceback_hash(traceback); /* intern the traceback */ - entry = _Py_hashtable_get_entry(tracemalloc_tracebacks, traceback); + entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_tracebacks, traceback); if (entry != NULL) { - traceback = (traceback_t *)entry->key; + _Py_HASHTABLE_ENTRY_READ_KEY(tracemalloc_tracebacks->key_size, entry, + traceback); } else { traceback_t *copy; @@ -422,7 +444,7 @@ traceback_new(void) } memcpy(copy, traceback, traceback_size); - if (_Py_hashtable_set(tracemalloc_tracebacks, copy, NULL, 0) < 0) { + if (_Py_HASHTABLE_SET_NODATA(tracemalloc_tracebacks, copy) < 0) { raw_free(copy); #ifdef TRACE_DEBUG tracemalloc_error("failed to intern the traceback: putdata failed"); @@ -464,7 +486,7 @@ tracemalloc_remove_trace(void *ptr) { trace_t trace; - if (_Py_hashtable_pop(tracemalloc_traces, ptr, &trace, sizeof(trace))) { + if (_Py_HASHTABLE_POP(tracemalloc_traces, ptr, trace)) { assert(tracemalloc_traced_memory >= trace.size); tracemalloc_traced_memory -= trace.size; } @@ -714,17 +736,23 @@ tracemalloc_raw_realloc(void *ctx, void *ptr, size_t new_size) #endif /* TRACE_RAW_MALLOC */ static int -tracemalloc_clear_filename(_Py_hashtable_entry_t *entry, void *user_data) +tracemalloc_clear_filename(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry, + void *user_data) { - PyObject *filename = (PyObject *)entry->key; + PyObject *filename; + + _Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, filename); Py_DECREF(filename); return 0; } static int -traceback_free_traceback(_Py_hashtable_entry_t *entry, void *user_data) +traceback_free_traceback(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry, + void *user_data) { - traceback_t *traceback = (traceback_t *)entry->key; + traceback_t *traceback; + + _Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, traceback); raw_free(traceback); return 0; } @@ -791,21 +819,20 @@ tracemalloc_init(void) } #endif - tracemalloc_filenames = hashtable_new(0, - (_Py_hashtable_hash_func)PyObject_Hash, + tracemalloc_filenames = hashtable_new(sizeof(PyObject *), 0, + hashtable_hash_pyobject, hashtable_compare_unicode); - tracemalloc_tracebacks = hashtable_new(0, - (_Py_hashtable_hash_func)hashtable_hash_traceback, - (_Py_hashtable_compare_func)hashtable_compare_traceback); + tracemalloc_tracebacks = hashtable_new(sizeof(traceback_t *), 0, + hashtable_hash_traceback, + hashtable_compare_traceback); - tracemalloc_traces = hashtable_new(sizeof(trace_t), + tracemalloc_traces = hashtable_new(sizeof(void*), sizeof(trace_t), _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct); if (tracemalloc_filenames == NULL || tracemalloc_tracebacks == NULL - || tracemalloc_traces == NULL) - { + || tracemalloc_traces == NULL) { PyErr_NoMemory(); return -1; } @@ -840,9 +867,9 @@ tracemalloc_deinit(void) tracemalloc_stop(); /* destroy hash tables */ - _Py_hashtable_destroy(tracemalloc_traces); _Py_hashtable_destroy(tracemalloc_tracebacks); _Py_hashtable_destroy(tracemalloc_filenames); + _Py_hashtable_destroy(tracemalloc_traces); #if defined(WITH_THREAD) && defined(TRACE_RAW_MALLOC) if (tables_lock != NULL) { @@ -935,8 +962,9 @@ tracemalloc_stop(void) PyMem_SetAllocator(PYMEM_DOMAIN_MEM, &allocators.mem); PyMem_SetAllocator(PYMEM_DOMAIN_OBJ, &allocators.obj); - /* release memory */ tracemalloc_clear_traces(); + + /* release memory */ raw_free(tracemalloc_traceback); tracemalloc_traceback = NULL; } @@ -1065,14 +1093,15 @@ typedef struct { } get_traces_t; static int -tracemalloc_get_traces_fill(_Py_hashtable_entry_t *entry, void *user_data) +tracemalloc_get_traces_fill(_Py_hashtable_t *traces, _Py_hashtable_entry_t *entry, + void *user_data) { get_traces_t *get_traces = user_data; trace_t *trace; PyObject *tracemalloc_obj; int res; - trace = (trace_t *)_Py_HASHTABLE_ENTRY_DATA(entry); + trace = (trace_t *)_Py_HASHTABLE_ENTRY_DATA(traces, entry); tracemalloc_obj = trace_to_pyobject(trace, get_traces->tracebacks); if (tracemalloc_obj == NULL) @@ -1087,9 +1116,11 @@ tracemalloc_get_traces_fill(_Py_hashtable_entry_t *entry, void *user_data) } static int -tracemalloc_pyobject_decref_cb(_Py_hashtable_entry_t *entry, void *user_data) +tracemalloc_pyobject_decref_cb(_Py_hashtable_t *tracebacks, + _Py_hashtable_entry_t *entry, + void *user_data) { - PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(tracebacks, entry); Py_DECREF(obj); return 0; } @@ -1120,7 +1151,7 @@ py_tracemalloc_get_traces(PyObject *self, PyObject *obj) /* the traceback hash table is used temporarily to intern traceback tuple of (filename, lineno) tuples */ - get_traces.tracebacks = hashtable_new(sizeof(PyObject *), + get_traces.tracebacks = hashtable_new(sizeof(traceback_t *), sizeof(PyObject *), _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct); if (get_traces.tracebacks == NULL) { @@ -1152,7 +1183,7 @@ error: finally: if (get_traces.tracebacks != NULL) { _Py_hashtable_foreach(get_traces.tracebacks, - tracemalloc_pyobject_decref_cb, NULL); + tracemalloc_pyobject_decref_cb, NULL); _Py_hashtable_destroy(get_traces.tracebacks); } if (get_traces.traces != NULL) diff --git a/Modules/hashtable.c b/Modules/hashtable.c index 7de154b70a7..d33f0d744ec 100644 --- a/Modules/hashtable.c +++ b/Modules/hashtable.c @@ -1,5 +1,5 @@ -/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash - project: +/* The implementation of the hash table (_Py_hashtable_t) is based on the + cfuhash project: http://sourceforge.net/projects/libcfu/ Copyright of cfuhash: @@ -59,7 +59,7 @@ #define ENTRY_NEXT(ENTRY) \ ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY)) #define HASHTABLE_ITEM_SIZE(HT) \ - (sizeof(_Py_hashtable_entry_t) + (HT)->data_size) + (sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size) /* Forward declaration */ static void hashtable_rehash(_Py_hashtable_t *ht); @@ -70,6 +70,7 @@ _Py_slist_init(_Py_slist_t *list) list->head = NULL; } + static void _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) { @@ -77,6 +78,7 @@ _Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item) list->head = item; } + static void _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, _Py_slist_item_t *item) @@ -87,24 +89,26 @@ _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, list->head = item->next; } -Py_uhash_t -_Py_hashtable_hash_int(const void *key) -{ - return (Py_uhash_t)key; -} Py_uhash_t -_Py_hashtable_hash_ptr(const void *key) +_Py_hashtable_hash_ptr(size_t key_size, const void *pkey) { + void *key; + + _Py_HASHTABLE_READ_KEY(key_size, pkey, key); return (Py_uhash_t)_Py_HashPointer((void *)key); } + int -_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry) +_Py_hashtable_compare_direct(size_t key_size, const void *pkey, + const _Py_hashtable_entry_t *entry) { - return entry->key == key; + const void *pkey2 = _Py_HASHTABLE_ENTRY_KEY(entry); + return (memcmp(pkey, pkey2, key_size) == 0); } + /* makes sure the real size of the buckets array is a power of 2 */ static size_t round_size(size_t s) @@ -118,8 +122,10 @@ round_size(size_t s) return i; } + _Py_hashtable_t * -_Py_hashtable_new_full(size_t data_size, size_t init_size, +_Py_hashtable_new_full(size_t key_size, size_t data_size, + size_t init_size, _Py_hashtable_hash_func hash_func, _Py_hashtable_compare_func compare_func, _Py_hashtable_copy_data_func copy_data_func, @@ -144,6 +150,7 @@ _Py_hashtable_new_full(size_t data_size, size_t init_size, ht->num_buckets = round_size(init_size); ht->entries = 0; + ht->key_size = key_size; ht->data_size = data_size; buckets_size = ht->num_buckets * sizeof(ht->buckets[0]); @@ -163,16 +170,19 @@ _Py_hashtable_new_full(size_t data_size, size_t init_size, return ht; } + _Py_hashtable_t * -_Py_hashtable_new(size_t data_size, +_Py_hashtable_new(size_t key_size, size_t data_size, _Py_hashtable_hash_func hash_func, _Py_hashtable_compare_func compare_func) { - return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE, + return _Py_hashtable_new_full(key_size, data_size, + HASHTABLE_MIN_SIZE, hash_func, compare_func, NULL, NULL, NULL, NULL); } + size_t _Py_hashtable_size(_Py_hashtable_t *ht) { @@ -195,7 +205,7 @@ _Py_hashtable_size(_Py_hashtable_t *ht) for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { void *data; - data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry); size += ht->get_data_size_func(data); } } @@ -203,6 +213,7 @@ _Py_hashtable_size(_Py_hashtable_t *ht) return size; } + #ifdef Py_DEBUG void _Py_hashtable_print_stats(_Py_hashtable_t *ht) @@ -243,38 +254,47 @@ _Py_hashtable_print_stats(_Py_hashtable_t *ht) } #endif -/* Get an entry. Return NULL if the key does not exist. */ + _Py_hashtable_entry_t * -_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key) +_Py_hashtable_get_entry(_Py_hashtable_t *ht, + size_t key_size, const void *pkey) { Py_uhash_t key_hash; size_t index; _Py_hashtable_entry_t *entry; - key_hash = ht->hash_func(key); + assert(key_size == ht->key_size); + + key_hash = ht->hash_func(key_size, pkey); index = key_hash & (ht->num_buckets - 1); for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { - if (entry->key_hash == key_hash && ht->compare_func(key, entry)) + if (entry->key_hash == key_hash + && ht->compare_func(key_size, pkey, entry)) break; } return entry; } + static int -_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +_Py_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey, + void *data, size_t data_size) { Py_uhash_t key_hash; size_t index; _Py_hashtable_entry_t *entry, *previous; - key_hash = ht->hash_func(key); + assert(key_size == ht->key_size); + + key_hash = ht->hash_func(key_size, pkey); index = key_hash & (ht->num_buckets - 1); previous = NULL; for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { - if (entry->key_hash == key_hash && ht->compare_func(key, entry)) + if (entry->key_hash == key_hash + && ht->compare_func(key_size, pkey, entry)) break; previous = entry; } @@ -287,7 +307,7 @@ _hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t da ht->entries--; if (data != NULL) - _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); + _Py_HASHTABLE_ENTRY_READ_DATA(ht, entry, data_size, data); ht->alloc.free(entry); if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW) @@ -295,26 +315,27 @@ _hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t da return 1; } -/* Add a new entry to the hash. The key must not be present in the hash table. - Return 0 on success, -1 on memory error. */ + int -_Py_hashtable_set(_Py_hashtable_t *ht, const void *key, - void *data, size_t data_size) +_Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey, + size_t data_size, void *data) { Py_uhash_t key_hash; size_t index; _Py_hashtable_entry_t *entry; + assert(key_size == ht->key_size); + assert(data != NULL || data_size == 0); #ifndef NDEBUG /* Don't write the assertion on a single line because it is interesting to know the duplicated entry if the assertion failed. The entry can be read using a debugger. */ - entry = _Py_hashtable_get_entry(ht, key); + entry = _Py_hashtable_get_entry(ht, key_size, pkey); assert(entry == NULL); #endif - key_hash = ht->hash_func(key); + key_hash = ht->hash_func(key_size, pkey); index = key_hash & (ht->num_buckets - 1); entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht)); @@ -323,11 +344,11 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, return -1; } - entry->key = (void *)key; entry->key_hash = key_hash; + memcpy((void *)_Py_HASHTABLE_ENTRY_KEY(entry), pkey, key_size); assert(data_size == ht->data_size); - memcpy(_Py_HASHTABLE_ENTRY_DATA(entry), data, data_size); + memcpy(_Py_HASHTABLE_ENTRY_DATA(ht, entry), data, data_size); _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); ht->entries++; @@ -337,48 +358,48 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, return 0; } -/* Get data from an entry. Copy entry data into data and return 1 if the entry - exists, return 0 if the entry does not exist. */ + int -_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey, + size_t data_size, void *data) { _Py_hashtable_entry_t *entry; assert(data != NULL); - entry = _Py_hashtable_get_entry(ht, key); + entry = _Py_hashtable_get_entry(ht, key_size, pkey); if (entry == NULL) return 0; - _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); + _Py_HASHTABLE_ENTRY_READ_DATA(ht, entry, data_size, data); return 1; } + int -_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +_Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey, + size_t data_size, void *data) { assert(data != NULL); assert(ht->free_data_func == NULL); - return _hashtable_pop_entry(ht, key, data, data_size); + return _Py_hashtable_pop_entry(ht, key_size, pkey, data, data_size); } -/* Delete an entry. The entry must exist. */ + void -_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key) +_Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey) { #ifndef NDEBUG - int found = _hashtable_pop_entry(ht, key, NULL, 0); + int found = _Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0); assert(found); #else - (void)_hashtable_pop_entry(ht, key, NULL, 0); + (void)_Py_hashtable_pop_entry(ht, key_size, pkey, NULL, 0); #endif } -/* Prototype for a pointer to a function to be called foreach - key/value pair in the hash by hashtable_foreach(). Iteration - stops if a non-zero value is returned. */ + int _Py_hashtable_foreach(_Py_hashtable_t *ht, - int (*func) (_Py_hashtable_entry_t *entry, void *arg), + _Py_hashtable_foreach_func func, void *arg) { _Py_hashtable_entry_t *entry; @@ -386,7 +407,7 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht, for (hv = 0; hv < ht->num_buckets; hv++) { for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { - int res = func(entry, arg); + int res = func(ht, entry, arg); if (res) return res; } @@ -394,9 +415,11 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht, return 0; } + static void hashtable_rehash(_Py_hashtable_t *ht) { + const size_t key_size = ht->key_size; size_t buckets_size, new_size, bucket; _Py_slist_t *old_buckets = NULL; size_t old_num_buckets; @@ -425,7 +448,8 @@ hashtable_rehash(_Py_hashtable_t *ht) for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) { size_t entry_index; - assert(ht->hash_func(entry->key) == entry->key_hash); + + assert(ht->hash_func(key_size, _Py_HASHTABLE_ENTRY_KEY(entry)) == entry->key_hash); next = ENTRY_NEXT(entry); entry_index = entry->key_hash & (new_size - 1); @@ -436,6 +460,7 @@ hashtable_rehash(_Py_hashtable_t *ht) ht->alloc.free(old_buckets); } + void _Py_hashtable_clear(_Py_hashtable_t *ht) { @@ -446,7 +471,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht) for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) { next = ENTRY_NEXT(entry); if (ht->free_data_func) - ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); + ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry)); ht->alloc.free(entry); } _Py_slist_init(&ht->buckets[i]); @@ -455,6 +480,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht) hashtable_rehash(ht); } + void _Py_hashtable_destroy(_Py_hashtable_t *ht) { @@ -465,7 +491,7 @@ _Py_hashtable_destroy(_Py_hashtable_t *ht) while (entry) { _Py_slist_item_t *entry_next = entry->next; if (ht->free_data_func) - ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); + ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry)); ht->alloc.free(entry); entry = entry_next; } @@ -475,17 +501,20 @@ _Py_hashtable_destroy(_Py_hashtable_t *ht) ht->alloc.free(ht); } -/* Return a copy of the hash table */ + _Py_hashtable_t * _Py_hashtable_copy(_Py_hashtable_t *src) { + const size_t key_size = src->key_size; + const size_t data_size = src->data_size; _Py_hashtable_t *dst; _Py_hashtable_entry_t *entry; size_t bucket; int err; void *data, *new_data; - dst = _Py_hashtable_new_full(src->data_size, src->num_buckets, + dst = _Py_hashtable_new_full(key_size, data_size, + src->num_buckets, src->hash_func, src->compare_func, src->copy_data_func, src->free_data_func, src->get_data_size_func, &src->alloc); @@ -496,17 +525,20 @@ _Py_hashtable_copy(_Py_hashtable_t *src) entry = TABLE_HEAD(src, bucket); for (; entry; entry = ENTRY_NEXT(entry)) { if (src->copy_data_func) { - data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(src, entry); new_data = src->copy_data_func(data); if (new_data != NULL) - err = _Py_hashtable_set(dst, entry->key, - &new_data, src->data_size); + err = _Py_hashtable_set(dst, key_size, + _Py_HASHTABLE_ENTRY_KEY(entry), + data_size, &new_data); else err = 1; } else { - data = _Py_HASHTABLE_ENTRY_DATA(entry); - err = _Py_hashtable_set(dst, entry->key, data, src->data_size); + data = _Py_HASHTABLE_ENTRY_DATA(src, entry); + err = _Py_hashtable_set(dst, key_size, + _Py_HASHTABLE_ENTRY_KEY(entry), + data_size, data); } if (err) { _Py_hashtable_destroy(dst); @@ -516,4 +548,3 @@ _Py_hashtable_copy(_Py_hashtable_t *src) } return dst; } - diff --git a/Modules/hashtable.h b/Modules/hashtable.h index a9f9993bfd7..6eb57371b22 100644 --- a/Modules/hashtable.h +++ b/Modules/hashtable.h @@ -1,9 +1,10 @@ #ifndef Py_HASHTABLE_H #define Py_HASHTABLE_H - /* The whole API is private */ #ifndef Py_LIMITED_API +/* Single linked list */ + typedef struct _Py_slist_item_s { struct _Py_slist_item_s *next; } _Py_slist_item_t; @@ -16,30 +17,55 @@ typedef struct { #define _Py_SLIST_HEAD(SLIST) (((_Py_slist_t *)SLIST)->head) + +/* _Py_hashtable: table entry */ + typedef struct { /* used by _Py_hashtable_t.buckets to link entries */ _Py_slist_item_t _Py_slist_item; - const void *key; Py_uhash_t key_hash; - /* data follows */ + /* key (key_size bytes) and then data (data_size bytes) follows */ } _Py_hashtable_entry_t; -#define _Py_HASHTABLE_ENTRY_DATA(ENTRY) \ - ((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t)) +#define _Py_HASHTABLE_ENTRY_KEY(ENTRY) \ + ((const void *)((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t))) -#define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ENTRY) \ - (*(void **)_Py_HASHTABLE_ENTRY_DATA(ENTRY)) +#define _Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY) \ + ((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t) + (TABLE)->key_size) -#define _Py_HASHTABLE_ENTRY_READ_DATA(TABLE, DATA, DATA_SIZE, ENTRY) \ +#define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(TABLE, ENTRY) \ + (*(void **)_Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY)) + +/* Get a key value from pkey: use memcpy() rather than a pointer dereference + to avoid memory alignment issues. */ +#define _Py_HASHTABLE_READ_KEY(KEY_SIZE, PKEY, DST_KEY) \ do { \ - assert((DATA_SIZE) == (TABLE)->data_size); \ - memcpy(DATA, _Py_HASHTABLE_ENTRY_DATA(ENTRY), DATA_SIZE); \ + assert(sizeof(DST_KEY) == (KEY_SIZE)); \ + memcpy(&(DST_KEY), (PKEY), sizeof(DST_KEY)); \ } while (0) -typedef Py_uhash_t (*_Py_hashtable_hash_func) (const void *key); -typedef int (*_Py_hashtable_compare_func) (const void *key, const _Py_hashtable_entry_t *he); +#define _Py_HASHTABLE_ENTRY_READ_KEY(KEY_SIZE, ENTRY, KEY) \ + do { \ + assert(sizeof(KEY) == (KEY_SIZE)); \ + memcpy(&(KEY), _Py_HASHTABLE_ENTRY_KEY(ENTRY), sizeof(KEY)); \ + } while (0) + +#define _Py_HASHTABLE_ENTRY_READ_DATA(TABLE, ENTRY, DATA_SIZE, DATA) \ + do { \ + assert((DATA_SIZE) == (TABLE)->data_size); \ + memcpy(DATA, _Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY), DATA_SIZE); \ + } while (0) + + +/* _Py_hashtable: prototypes */ + +typedef Py_uhash_t (*_Py_hashtable_hash_func) (size_t key_size, + const void *pkey); +typedef int (*_Py_hashtable_compare_func) (size_t key_size, + const void *pkey, + const _Py_hashtable_entry_t *he); typedef void* (*_Py_hashtable_copy_data_func)(void *data); typedef void (*_Py_hashtable_free_data_func)(void *data); typedef size_t (*_Py_hashtable_get_data_size_func)(void *data); @@ -52,10 +78,14 @@ typedef struct { void (*free) (void *ptr); } _Py_hashtable_allocator_t; + +/* _Py_hashtable: table */ + typedef struct { size_t num_buckets; size_t entries; /* Total number of entries in the table. */ _Py_slist_t *buckets; + size_t key_size; size_t data_size; _Py_hashtable_hash_func hash_func; @@ -66,16 +96,25 @@ typedef struct { _Py_hashtable_allocator_t alloc; } _Py_hashtable_t; -/* hash and compare functions for integers and pointers */ -PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(const void *key); -PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_int(const void *key); -PyAPI_FUNC(int) _Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry); +/* hash a pointer (void*) */ +PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr( + size_t key_size, + const void *pkey); + +/* comparison using memcmp() */ +PyAPI_FUNC(int) _Py_hashtable_compare_direct( + size_t key_size, + const void *pkey, + const _Py_hashtable_entry_t *entry); PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new( + size_t key_size, size_t data_size, _Py_hashtable_hash_func hash_func, _Py_hashtable_compare_func compare_func); + PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full( + size_t key_size, size_t data_size, size_t init_size, _Py_hashtable_hash_func hash_func, @@ -84,45 +123,95 @@ PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full( _Py_hashtable_free_data_func free_data_func, _Py_hashtable_get_data_size_func get_data_size_func, _Py_hashtable_allocator_t *allocator); -PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_copy(_Py_hashtable_t *src); -PyAPI_FUNC(void) _Py_hashtable_clear(_Py_hashtable_t *ht); + PyAPI_FUNC(void) _Py_hashtable_destroy(_Py_hashtable_t *ht); -typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_entry_t *entry, void *arg); +/* Return a copy of the hash table */ +PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_copy(_Py_hashtable_t *src); +PyAPI_FUNC(void) _Py_hashtable_clear(_Py_hashtable_t *ht); + +typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_t *ht, + _Py_hashtable_entry_t *entry, + void *arg); + +/* Call func() on each entry of the hashtable. + Iteration stops if func() result is non-zero, in this case it's the result + of the call. Otherwise, the function returns 0. */ PyAPI_FUNC(int) _Py_hashtable_foreach( _Py_hashtable_t *ht, - _Py_hashtable_foreach_func func, void *arg); + _Py_hashtable_foreach_func func, + void *arg); + PyAPI_FUNC(size_t) _Py_hashtable_size(_Py_hashtable_t *ht); -PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry( - _Py_hashtable_t *ht, - const void *key); +/* Add a new entry to the hash. The key must not be present in the hash table. + Return 0 on success, -1 on memory error. + + Don't call directly this function, + but use _Py_HASHTABLE_SET() and _Py_HASHTABLE_SET_NODATA() macros */ PyAPI_FUNC(int) _Py_hashtable_set( _Py_hashtable_t *ht, - const void *key, - void *data, - size_t data_size); -PyAPI_FUNC(int) _Py_hashtable_get( - _Py_hashtable_t *ht, - const void *key, - void *data, - size_t data_size); -PyAPI_FUNC(int) _Py_hashtable_pop( - _Py_hashtable_t *ht, - const void *key, - void *data, - size_t data_size); -PyAPI_FUNC(void) _Py_hashtable_delete( - _Py_hashtable_t *ht, - const void *key); + size_t key_size, + const void *pkey, + size_t data_size, + void *data); #define _Py_HASHTABLE_SET(TABLE, KEY, DATA) \ - _Py_hashtable_set(TABLE, KEY, &(DATA), sizeof(DATA)) + _Py_hashtable_set(TABLE, sizeof(KEY), &KEY, sizeof(DATA), &(DATA)) + +#define _Py_HASHTABLE_SET_NODATA(TABLE, KEY) \ + _Py_hashtable_set(TABLE, sizeof(KEY), &KEY, 0, NULL) + + +/* Get an entry. + Return NULL if the key does not exist. + + Don't call directly this function, but use _Py_HASHTABLE_GET_ENTRY() + macro */ +PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry( + _Py_hashtable_t *ht, + size_t key_size, + const void *pkey); + +#define _Py_HASHTABLE_GET_ENTRY(TABLE, KEY) \ + _Py_hashtable_get_entry(TABLE, sizeof(KEY), &(KEY)) + + +/* Get data from an entry. Copy entry data into data and return 1 if the entry + exists, return 0 if the entry does not exist. + + Don't call directly this function, but use _Py_HASHTABLE_GET() macro */ +PyAPI_FUNC(int) _Py_hashtable_get( + _Py_hashtable_t *ht, + size_t key_size, + const void *pkey, + size_t data_size, + void *data); #define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \ - _Py_hashtable_get(TABLE, KEY, &(DATA), sizeof(DATA)) + _Py_hashtable_get(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA)) + + +/* Don't call directly this function, but use _Py_HASHTABLE_POP() macro */ +PyAPI_FUNC(int) _Py_hashtable_pop( + _Py_hashtable_t *ht, + size_t key_size, + const void *pkey, + size_t data_size, + void *data); + +#define _Py_HASHTABLE_POP(TABLE, KEY, DATA) \ + _Py_hashtable_pop(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA)) + + +/* Delete an entry. + + WARNING: The entry must exist. */ +PyAPI_FUNC(void) _Py_hashtable_delete( + _Py_hashtable_t *ht, + size_t key_size, + const void *pkey); #endif /* Py_LIMITED_API */ - #endif diff --git a/Python/marshal.c b/Python/marshal.c index 7a4b9d29b4e..83a1885181d 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -263,10 +263,10 @@ w_ref(PyObject *v, char *flag, WFILE *p) if (Py_REFCNT(v) == 1) return 0; - entry = _Py_hashtable_get_entry(p->hashtable, v); + entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v); if (entry != NULL) { /* write the reference index to the stream */ - _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, &w, sizeof(w), entry); + _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, entry, sizeof(w), &w); /* we don't store "long" indices in the dict */ assert(0 <= w && w <= 0x7fffffff); w_byte(TYPE_REF, p); @@ -571,7 +571,8 @@ static int w_init_refs(WFILE *wf, int version) { if (version >= 3) { - wf->hashtable = _Py_hashtable_new(sizeof(int), _Py_hashtable_hash_ptr, + wf->hashtable = _Py_hashtable_new(sizeof(PyObject *), sizeof(int), + _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct); if (wf->hashtable == NULL) { PyErr_NoMemory(); @@ -582,9 +583,13 @@ w_init_refs(WFILE *wf, int version) } static int -w_decref_entry(_Py_hashtable_entry_t *entry, void *Py_UNUSED(data)) +w_decref_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry, + void *Py_UNUSED(data)) { - Py_XDECREF(entry->key); + PyObject *entry_key; + + _Py_HASHTABLE_ENTRY_READ_KEY(ht->key_size, entry, entry_key); + Py_XDECREF(entry_key); return 0; }