From 51b846c47a9b1db927939ccfb037a5a0ff6ff99c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 18 Mar 2016 21:52:22 +0100 Subject: [PATCH] _tracemalloc: add domain to trace keys * hashtable.h: key has now a variable size * _tracemalloc uses (pointer: void*, domain: unsigned int) as key for traces --- Modules/_tracemalloc.c | 188 ++++++++++++++++++++++++++++++----------- Modules/hashtable.c | 105 +++++++++++++---------- Modules/hashtable.h | 89 +++++++++++++------ Python/marshal.c | 11 ++- 4 files changed, 271 insertions(+), 122 deletions(-) diff --git a/Modules/_tracemalloc.c b/Modules/_tracemalloc.c index 5752904d477..73aa53b76d9 100644 --- a/Modules/_tracemalloc.c +++ b/Modules/_tracemalloc.c @@ -3,6 +3,7 @@ #include "frameobject.h" #include "pythread.h" #include "osdefs.h" +#include /* For offsetof */ /* Trace memory blocks allocated by PyMem_RawMalloc() */ #define TRACE_RAW_MALLOC @@ -54,6 +55,26 @@ static PyThread_type_lock tables_lock; # define TABLES_UNLOCK() #endif +typedef unsigned int domain_t; + +/* FIXME: pack also? */ +typedef struct { + void *ptr; + domain_t domain; +} pointer_t; + +/* Size of pointer_t content, it can be smaller than sizeof(pointer_t) */ +#define POINTER_T_SIZE \ + (offsetof(pointer_t, domain) + sizeof(domain_t)) + +#define POINTER_T_FILL_PADDING(key) \ + do { \ + if (POINTER_T_SIZE != sizeof(pointer_t)) { \ + memset((char *)&(key) + POINTER_T_SIZE, 0, \ + sizeof(pointer_t) - POINTER_T_SIZE); \ + } \ + } while (0) + /* Pack the frame_t structure to reduce the memory footprint on 64-bit architectures: 12 bytes instead of 16. This optimization might produce SIGBUS on architectures not supporting unaligned memory accesses (64-bit @@ -196,23 +217,56 @@ set_reentrant(int reentrant) } #endif -static int -hashtable_compare_unicode(const void *key, const _Py_hashtable_entry_t *entry) +static Py_uhash_t +hashtable_hash_pointer(size_t key_size, const void *pkey) { - if (key != NULL && entry->key != NULL) - return (PyUnicode_Compare((PyObject *)key, (PyObject *)entry->key) == 0); + pointer_t ptr; + Py_uhash_t hash; + + assert(sizeof(ptr) == key_size); + ptr = *(pointer_t *)pkey; + + hash = (Py_uhash_t)_Py_HashPointer(ptr.ptr); + hash ^= ptr.domain; + return hash; +} + +static Py_uhash_t +hashtable_hash_pyobject(size_t key_size, const void *pkey) +{ + PyObject *obj; + + assert(key_size == sizeof(PyObject *)); + obj = *(PyObject **)pkey; + + return PyObject_Hash(obj); +} + +static int +hashtable_compare_unicode(size_t key_size, const void *pkey, + const _Py_hashtable_entry_t *entry) +{ + PyObject *key, *entry_key; + + assert(sizeof(key) == key_size); + key = *(PyObject **)pkey; + assert(sizeof(entry_key) == key_size); + entry_key = *(PyObject **)_Py_HASHTABLE_ENTRY_KEY(entry); + + if (key != NULL && entry_key != NULL) + return (PyUnicode_Compare(key, entry_key) == 0); else - return key == entry->key; + return key == entry_key; } static _Py_hashtable_allocator_t hashtable_alloc = {malloc, free}; static _Py_hashtable_t * -hashtable_new(size_t data_size, +hashtable_new(size_t key_size, size_t data_size, _Py_hashtable_hash_func hash_func, _Py_hashtable_compare_func compare_func) { - return _Py_hashtable_new_full(data_size, 0, + return _Py_hashtable_new_full(key_size, data_size, 0, hash_func, compare_func, NULL, NULL, NULL, &hashtable_alloc); } @@ -230,20 +284,26 @@ raw_free(void *ptr) } static Py_uhash_t -hashtable_hash_traceback(const void *key) +hashtable_hash_traceback(size_t key_size, const void *pkey) { - const traceback_t *traceback = key; + const traceback_t *traceback = *(const traceback_t **)pkey; + assert(key_size == sizeof(const traceback_t *)); return traceback->hash; } static int -hashtable_compare_traceback(const traceback_t *traceback1, +hashtable_compare_traceback(size_t key_size, const void *pkey, const _Py_hashtable_entry_t *he) { - const traceback_t *traceback2 = he->key; + traceback_t *traceback1, *traceback2; const frame_t *frame1, *frame2; int i; + assert(sizeof(traceback1) == key_size); + assert(sizeof(traceback2) == key_size); + traceback1 = *(traceback_t **)pkey; + traceback2 = *(traceback_t **)_Py_HASHTABLE_ENTRY_KEY(he); + if (traceback1->nframe != traceback2->nframe) return 0; @@ -312,15 +372,16 @@ tracemalloc_get_frame(PyFrameObject *pyframe, frame_t *frame) } /* intern the filename */ - entry = _Py_hashtable_get_entry(tracemalloc_filenames, filename); + entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_filenames, filename); if (entry != NULL) { - filename = (PyObject *)entry->key; + assert(sizeof(filename) == tracemalloc_filenames->key_size); + filename = *(PyObject **)_Py_HASHTABLE_ENTRY_KEY(entry); } else { /* tracemalloc_filenames is responsible to keep a reference to the filename */ Py_INCREF(filename); - if (_Py_hashtable_set(tracemalloc_filenames, filename, NULL, 0) < 0) { + if (_Py_HASHTABLE_SET_NODATA(tracemalloc_filenames, filename) < 0) { Py_DECREF(filename); #ifdef TRACE_DEBUG tracemalloc_error("failed to intern the filename"); @@ -403,9 +464,10 @@ traceback_new(void) traceback->hash = traceback_hash(traceback); /* intern the traceback */ - entry = _Py_hashtable_get_entry(tracemalloc_tracebacks, traceback); + entry = _Py_HASHTABLE_GET_ENTRY(tracemalloc_tracebacks, traceback); if (entry != NULL) { - traceback = (traceback_t *)entry->key; + assert(sizeof(traceback) == tracemalloc_tracebacks->key_size); + traceback = *(traceback_t **)_Py_HASHTABLE_ENTRY_KEY(entry); } else { traceback_t *copy; @@ -422,7 +484,7 @@ traceback_new(void) } memcpy(copy, traceback, traceback_size); - if (_Py_hashtable_set(tracemalloc_tracebacks, copy, NULL, 0) < 0) { + if (_Py_HASHTABLE_SET_NODATA(tracemalloc_tracebacks, copy) < 0) { raw_free(copy); #ifdef TRACE_DEBUG tracemalloc_error("failed to intern the traceback: putdata failed"); @@ -435,8 +497,9 @@ traceback_new(void) } static int -tracemalloc_add_trace(void *ptr, size_t size) +tracemalloc_add_trace(void *ptr, domain_t domain, size_t size) { + pointer_t key; traceback_t *traceback; trace_t trace; int res; @@ -445,10 +508,14 @@ tracemalloc_add_trace(void *ptr, size_t size) if (traceback == NULL) return -1; + key.ptr = ptr; + key.domain = 0; + POINTER_T_FILL_PADDING(key); + trace.size = size; trace.traceback = traceback; - res = _Py_HASHTABLE_SET(tracemalloc_traces, ptr, trace); + res = _Py_HASHTABLE_SET(tracemalloc_traces, key, trace); if (res == 0) { assert(tracemalloc_traced_memory <= PY_SIZE_MAX - size); tracemalloc_traced_memory += size; @@ -460,11 +527,16 @@ tracemalloc_add_trace(void *ptr, size_t size) } static void -tracemalloc_remove_trace(void *ptr) +tracemalloc_remove_trace(void *ptr, domain_t domain) { + pointer_t key; trace_t trace; - if (_Py_hashtable_pop(tracemalloc_traces, ptr, &trace, sizeof(trace))) { + key.ptr = ptr; + key.domain = domain; + POINTER_T_FILL_PADDING(key); + + if (_Py_HASHTABLE_POP(tracemalloc_traces, key, trace)) { assert(tracemalloc_traced_memory >= trace.size); tracemalloc_traced_memory -= trace.size; } @@ -486,7 +558,7 @@ tracemalloc_alloc(int use_calloc, void *ctx, size_t nelem, size_t elsize) return NULL; TABLES_LOCK(); - if (tracemalloc_add_trace(ptr, nelem * elsize) < 0) { + if (tracemalloc_add_trace(ptr, 0, nelem * elsize) < 0) { /* Failed to allocate a trace for the new memory block */ TABLES_UNLOCK(); alloc->free(alloc->ctx, ptr); @@ -510,9 +582,9 @@ tracemalloc_realloc(void *ctx, void *ptr, size_t new_size) /* an existing memory block has been resized */ TABLES_LOCK(); - tracemalloc_remove_trace(ptr); + tracemalloc_remove_trace(ptr, 0); - if (tracemalloc_add_trace(ptr2, new_size) < 0) { + if (tracemalloc_add_trace(ptr2, 0, new_size) < 0) { /* Memory allocation failed. The error cannot be reported to the caller, because realloc() may already have shrinked the memory block and so removed bytes. @@ -530,7 +602,7 @@ tracemalloc_realloc(void *ctx, void *ptr, size_t new_size) /* new allocation */ TABLES_LOCK(); - if (tracemalloc_add_trace(ptr2, new_size) < 0) { + if (tracemalloc_add_trace(ptr2, 0, new_size) < 0) { /* Failed to allocate a trace for the new memory block */ TABLES_UNLOCK(); alloc->free(alloc->ctx, ptr2); @@ -555,7 +627,7 @@ tracemalloc_free(void *ctx, void *ptr) alloc->free(alloc->ctx, ptr); TABLES_LOCK(); - tracemalloc_remove_trace(ptr); + tracemalloc_remove_trace(ptr, 0); TABLES_UNLOCK(); } @@ -610,7 +682,7 @@ tracemalloc_realloc_gil(void *ctx, void *ptr, size_t new_size) ptr2 = alloc->realloc(alloc->ctx, ptr, new_size); if (ptr2 != NULL && ptr != NULL) { TABLES_LOCK(); - tracemalloc_remove_trace(ptr); + tracemalloc_remove_trace(ptr, 0); TABLES_UNLOCK(); } return ptr2; @@ -689,7 +761,7 @@ tracemalloc_raw_realloc(void *ctx, void *ptr, size_t new_size) if (ptr2 != NULL && ptr != NULL) { TABLES_LOCK(); - tracemalloc_remove_trace(ptr); + tracemalloc_remove_trace(ptr, 0); TABLES_UNLOCK(); } return ptr2; @@ -714,17 +786,27 @@ tracemalloc_raw_realloc(void *ctx, void *ptr, size_t new_size) #endif /* TRACE_RAW_MALLOC */ static int -tracemalloc_clear_filename(_Py_hashtable_entry_t *entry, void *user_data) +tracemalloc_clear_filename(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry, + void *user_data) { - PyObject *filename = (PyObject *)entry->key; + PyObject *filename; + + assert(sizeof(filename) == ht->key_size); + filename = *(PyObject **)_Py_HASHTABLE_ENTRY_KEY(entry); + Py_DECREF(filename); return 0; } static int -traceback_free_traceback(_Py_hashtable_entry_t *entry, void *user_data) +traceback_free_traceback(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry, + void *user_data) { - traceback_t *traceback = (traceback_t *)entry->key; + traceback_t *traceback; + + assert(sizeof(traceback) == ht->key_size); + traceback = *(traceback_t **)_Py_HASHTABLE_ENTRY_KEY(entry); + raw_free(traceback); return 0; } @@ -791,16 +873,16 @@ tracemalloc_init(void) } #endif - tracemalloc_filenames = hashtable_new(0, - (_Py_hashtable_hash_func)PyObject_Hash, + tracemalloc_filenames = hashtable_new(sizeof(PyObject *), 0, + hashtable_hash_pyobject, hashtable_compare_unicode); - tracemalloc_tracebacks = hashtable_new(0, - (_Py_hashtable_hash_func)hashtable_hash_traceback, - (_Py_hashtable_compare_func)hashtable_compare_traceback); + tracemalloc_tracebacks = hashtable_new(sizeof(traceback_t *), 0, + hashtable_hash_traceback, + hashtable_compare_traceback); - tracemalloc_traces = hashtable_new(sizeof(trace_t), - _Py_hashtable_hash_ptr, + tracemalloc_traces = hashtable_new(sizeof(pointer_t), sizeof(trace_t), + hashtable_hash_pointer, _Py_hashtable_compare_direct); if (tracemalloc_filenames == NULL || tracemalloc_tracebacks == NULL @@ -1065,14 +1147,15 @@ typedef struct { } get_traces_t; static int -tracemalloc_get_traces_fill(_Py_hashtable_entry_t *entry, void *user_data) +tracemalloc_get_traces_fill(_Py_hashtable_t *traces, _Py_hashtable_entry_t *entry, + void *user_data) { get_traces_t *get_traces = user_data; trace_t *trace; PyObject *tracemalloc_obj; int res; - trace = (trace_t *)_Py_HASHTABLE_ENTRY_DATA(entry); + trace = (trace_t *)_Py_HASHTABLE_ENTRY_DATA(traces, entry); tracemalloc_obj = trace_to_pyobject(trace, get_traces->tracebacks); if (tracemalloc_obj == NULL) @@ -1087,9 +1170,11 @@ tracemalloc_get_traces_fill(_Py_hashtable_entry_t *entry, void *user_data) } static int -tracemalloc_pyobject_decref_cb(_Py_hashtable_entry_t *entry, void *user_data) +tracemalloc_pyobject_decref_cb(_Py_hashtable_t *tracebacks, + _Py_hashtable_entry_t *entry, + void *user_data) { - PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + PyObject *obj = (PyObject *)_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(tracebacks, entry); Py_DECREF(obj); return 0; } @@ -1120,7 +1205,7 @@ py_tracemalloc_get_traces(PyObject *self, PyObject *obj) /* the traceback hash table is used temporarily to intern traceback tuple of (filename, lineno) tuples */ - get_traces.tracebacks = hashtable_new(sizeof(PyObject *), + get_traces.tracebacks = hashtable_new(sizeof(traceback_t *), sizeof(PyObject *), _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct); if (get_traces.tracebacks == NULL) { @@ -1152,7 +1237,7 @@ error: finally: if (get_traces.tracebacks != NULL) { _Py_hashtable_foreach(get_traces.tracebacks, - tracemalloc_pyobject_decref_cb, NULL); + tracemalloc_pyobject_decref_cb, NULL); _Py_hashtable_destroy(get_traces.tracebacks); } if (get_traces.traces != NULL) @@ -1162,16 +1247,21 @@ finally: } static traceback_t* -tracemalloc_get_traceback(const void *ptr) +tracemalloc_get_traceback(const void *ptr, domain_t domain) { + pointer_t key; trace_t trace; int found; if (!tracemalloc_config.tracing) return NULL; + key.ptr = (void *)ptr; + key.domain = domain; + POINTER_T_FILL_PADDING(key); + TABLES_LOCK(); - found = _Py_HASHTABLE_GET(tracemalloc_traces, ptr, trace); + found = _Py_HASHTABLE_GET(tracemalloc_traces, key, trace); TABLES_UNLOCK(); if (!found) @@ -1202,7 +1292,7 @@ py_tracemalloc_get_object_traceback(PyObject *self, PyObject *obj) else ptr = (void *)obj; - traceback = tracemalloc_get_traceback(ptr); + traceback = tracemalloc_get_traceback(ptr, 0); if (traceback == NULL) Py_RETURN_NONE; @@ -1229,7 +1319,7 @@ _PyMem_DumpTraceback(int fd, const void *ptr) traceback_t *traceback; int i; - traceback = tracemalloc_get_traceback(ptr); + traceback = tracemalloc_get_traceback(ptr, 0); if (traceback == NULL) return; diff --git a/Modules/hashtable.c b/Modules/hashtable.c index 7de154b70a7..002c0a93420 100644 --- a/Modules/hashtable.c +++ b/Modules/hashtable.c @@ -59,7 +59,7 @@ #define ENTRY_NEXT(ENTRY) \ ((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY)) #define HASHTABLE_ITEM_SIZE(HT) \ - (sizeof(_Py_hashtable_entry_t) + (HT)->data_size) + (sizeof(_Py_hashtable_entry_t) + (HT)->key_size + (HT)->data_size) /* Forward declaration */ static void hashtable_rehash(_Py_hashtable_t *ht); @@ -88,21 +88,21 @@ _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, } Py_uhash_t -_Py_hashtable_hash_int(const void *key) +_Py_hashtable_hash_ptr(size_t key_size, const void *pkey) { - return (Py_uhash_t)key; -} + void *key; + + assert(key_size == sizeof(void *)); + key = *(void**)pkey; -Py_uhash_t -_Py_hashtable_hash_ptr(const void *key) -{ return (Py_uhash_t)_Py_HashPointer((void *)key); } int -_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry) +_Py_hashtable_compare_direct(size_t key_size, const void *pkey, + const _Py_hashtable_entry_t *entry) { - return entry->key == key; + return (memcmp(pkey, _Py_HASHTABLE_ENTRY_KEY(entry), key_size) == 0); } /* makes sure the real size of the buckets array is a power of 2 */ @@ -119,7 +119,8 @@ round_size(size_t s) } _Py_hashtable_t * -_Py_hashtable_new_full(size_t data_size, size_t init_size, +_Py_hashtable_new_full(size_t key_size, size_t data_size, + size_t init_size, _Py_hashtable_hash_func hash_func, _Py_hashtable_compare_func compare_func, _Py_hashtable_copy_data_func copy_data_func, @@ -144,6 +145,7 @@ _Py_hashtable_new_full(size_t data_size, size_t init_size, ht->num_buckets = round_size(init_size); ht->entries = 0; + ht->key_size = key_size; ht->data_size = data_size; buckets_size = ht->num_buckets * sizeof(ht->buckets[0]); @@ -164,11 +166,12 @@ _Py_hashtable_new_full(size_t data_size, size_t init_size, } _Py_hashtable_t * -_Py_hashtable_new(size_t data_size, +_Py_hashtable_new(size_t key_size, size_t data_size, _Py_hashtable_hash_func hash_func, _Py_hashtable_compare_func compare_func) { - return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE, + return _Py_hashtable_new_full(key_size, data_size, + HASHTABLE_MIN_SIZE, hash_func, compare_func, NULL, NULL, NULL, NULL); } @@ -195,7 +198,7 @@ _Py_hashtable_size(_Py_hashtable_t *ht) for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { void *data; - data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry); size += ht->get_data_size_func(data); } } @@ -245,17 +248,21 @@ _Py_hashtable_print_stats(_Py_hashtable_t *ht) /* Get an entry. Return NULL if the key does not exist. */ _Py_hashtable_entry_t * -_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key) +_Py_hashtable_get_entry(_Py_hashtable_t *ht, + size_t key_size, const void *pkey) { Py_uhash_t key_hash; size_t index; _Py_hashtable_entry_t *entry; - key_hash = ht->hash_func(key); + assert(key_size == ht->key_size); + + key_hash = ht->hash_func(key_size, pkey); index = key_hash & (ht->num_buckets - 1); for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { - if (entry->key_hash == key_hash && ht->compare_func(key, entry)) + if (entry->key_hash == key_hash + && ht->compare_func(key_size, pkey, entry)) break; } @@ -263,18 +270,20 @@ _Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key) } static int -_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +_hashtable_pop_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey, + void *data, size_t data_size) { Py_uhash_t key_hash; size_t index; _Py_hashtable_entry_t *entry, *previous; - key_hash = ht->hash_func(key); + key_hash = ht->hash_func(key_size, pkey); index = key_hash & (ht->num_buckets - 1); previous = NULL; for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { - if (entry->key_hash == key_hash && ht->compare_func(key, entry)) + if (entry->key_hash == key_hash + && ht->compare_func(key_size, pkey, entry)) break; previous = entry; } @@ -298,8 +307,8 @@ _hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t da /* Add a new entry to the hash. The key must not be present in the hash table. Return 0 on success, -1 on memory error. */ int -_Py_hashtable_set(_Py_hashtable_t *ht, const void *key, - void *data, size_t data_size) +_Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey, + size_t data_size, void *data) { Py_uhash_t key_hash; size_t index; @@ -310,11 +319,11 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, /* Don't write the assertion on a single line because it is interesting to know the duplicated entry if the assertion failed. The entry can be read using a debugger. */ - entry = _Py_hashtable_get_entry(ht, key); + entry = _Py_hashtable_get_entry(ht, key_size, pkey); assert(entry == NULL); #endif - key_hash = ht->hash_func(key); + key_hash = ht->hash_func(key_size, pkey); index = key_hash & (ht->num_buckets - 1); entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht)); @@ -323,11 +332,11 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, return -1; } - entry->key = (void *)key; entry->key_hash = key_hash; + memcpy(_Py_HASHTABLE_ENTRY_KEY(entry), pkey, key_size); assert(data_size == ht->data_size); - memcpy(_Py_HASHTABLE_ENTRY_DATA(entry), data, data_size); + memcpy(_Py_HASHTABLE_ENTRY_DATA(ht, entry), data, data_size); _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); ht->entries++; @@ -340,13 +349,14 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, /* Get data from an entry. Copy entry data into data and return 1 if the entry exists, return 0 if the entry does not exist. */ int -_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey, + size_t data_size, void *data) { _Py_hashtable_entry_t *entry; assert(data != NULL); - entry = _Py_hashtable_get_entry(ht, key); + entry = _Py_hashtable_get_entry(ht, key_size, pkey); if (entry == NULL) return 0; _Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry); @@ -354,22 +364,23 @@ _Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_ } int -_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size) +_Py_hashtable_pop(_Py_hashtable_t *ht, size_t key_size, const void *pkey, + size_t data_size, void *data) { assert(data != NULL); assert(ht->free_data_func == NULL); - return _hashtable_pop_entry(ht, key, data, data_size); + return _hashtable_pop_entry(ht, key_size, pkey, data, data_size); } /* Delete an entry. The entry must exist. */ void -_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key) +_Py_hashtable_delete(_Py_hashtable_t *ht, size_t key_size, const void *pkey) { #ifndef NDEBUG - int found = _hashtable_pop_entry(ht, key, NULL, 0); + int found = _hashtable_pop_entry(ht, key_size, pkey, NULL, 0); assert(found); #else - (void)_hashtable_pop_entry(ht, key, NULL, 0); + (void)_hashtable_pop_entry(ht, key_size, pkey, NULL, 0); #endif } @@ -378,7 +389,7 @@ _Py_hashtable_delete(_Py_hashtable_t *ht, const void *key) stops if a non-zero value is returned. */ int _Py_hashtable_foreach(_Py_hashtable_t *ht, - int (*func) (_Py_hashtable_entry_t *entry, void *arg), + _Py_hashtable_foreach_func func, void *arg) { _Py_hashtable_entry_t *entry; @@ -386,7 +397,7 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht, for (hv = 0; hv < ht->num_buckets; hv++) { for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { - int res = func(entry, arg); + int res = func(ht, entry, arg); if (res) return res; } @@ -397,6 +408,7 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht, static void hashtable_rehash(_Py_hashtable_t *ht) { + const size_t key_size = ht->key_size; size_t buckets_size, new_size, bucket; _Py_slist_t *old_buckets = NULL; size_t old_num_buckets; @@ -425,7 +437,8 @@ hashtable_rehash(_Py_hashtable_t *ht) for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) { size_t entry_index; - assert(ht->hash_func(entry->key) == entry->key_hash); + + assert(ht->hash_func(key_size, _Py_HASHTABLE_ENTRY_KEY(entry)) == entry->key_hash); next = ENTRY_NEXT(entry); entry_index = entry->key_hash & (new_size - 1); @@ -446,7 +459,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht) for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) { next = ENTRY_NEXT(entry); if (ht->free_data_func) - ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); + ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry)); ht->alloc.free(entry); } _Py_slist_init(&ht->buckets[i]); @@ -465,7 +478,7 @@ _Py_hashtable_destroy(_Py_hashtable_t *ht) while (entry) { _Py_slist_item_t *entry_next = entry->next; if (ht->free_data_func) - ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry)); + ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ht, entry)); ht->alloc.free(entry); entry = entry_next; } @@ -479,13 +492,16 @@ _Py_hashtable_destroy(_Py_hashtable_t *ht) _Py_hashtable_t * _Py_hashtable_copy(_Py_hashtable_t *src) { + const size_t key_size = src->key_size; + const size_t data_size = src->data_size; _Py_hashtable_t *dst; _Py_hashtable_entry_t *entry; size_t bucket; int err; void *data, *new_data; - dst = _Py_hashtable_new_full(src->data_size, src->num_buckets, + dst = _Py_hashtable_new_full(key_size, data_size, + src->num_buckets, src->hash_func, src->compare_func, src->copy_data_func, src->free_data_func, src->get_data_size_func, &src->alloc); @@ -496,17 +512,20 @@ _Py_hashtable_copy(_Py_hashtable_t *src) entry = TABLE_HEAD(src, bucket); for (; entry; entry = ENTRY_NEXT(entry)) { if (src->copy_data_func) { - data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry); + data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(src, entry); new_data = src->copy_data_func(data); if (new_data != NULL) - err = _Py_hashtable_set(dst, entry->key, - &new_data, src->data_size); + err = _Py_hashtable_set(dst, key_size, + _Py_HASHTABLE_ENTRY_KEY(entry), + data_size, &new_data); else err = 1; } else { - data = _Py_HASHTABLE_ENTRY_DATA(entry); - err = _Py_hashtable_set(dst, entry->key, data, src->data_size); + data = _Py_HASHTABLE_ENTRY_DATA(src, entry); + err = _Py_hashtable_set(dst, key_size, + _Py_HASHTABLE_ENTRY_KEY(entry), + data_size, data); } if (err) { _Py_hashtable_destroy(dst); diff --git a/Modules/hashtable.h b/Modules/hashtable.h index a9f9993bfd7..aed3ed07d9d 100644 --- a/Modules/hashtable.h +++ b/Modules/hashtable.h @@ -20,26 +20,31 @@ typedef struct { /* used by _Py_hashtable_t.buckets to link entries */ _Py_slist_item_t _Py_slist_item; - const void *key; Py_uhash_t key_hash; /* data follows */ } _Py_hashtable_entry_t; -#define _Py_HASHTABLE_ENTRY_DATA(ENTRY) \ - ((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t)) +#define _Py_HASHTABLE_ENTRY_KEY(ENTRY) \ + ((void *)((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t))) -#define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(ENTRY) \ - (*(void **)_Py_HASHTABLE_ENTRY_DATA(ENTRY)) +#define _Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY) \ + ((char *)(ENTRY) + sizeof(_Py_hashtable_entry_t) + (TABLE)->key_size) + +#define _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(TABLE, ENTRY) \ + (*(void **)_Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY)) #define _Py_HASHTABLE_ENTRY_READ_DATA(TABLE, DATA, DATA_SIZE, ENTRY) \ do { \ assert((DATA_SIZE) == (TABLE)->data_size); \ - memcpy(DATA, _Py_HASHTABLE_ENTRY_DATA(ENTRY), DATA_SIZE); \ + memcpy(DATA, _Py_HASHTABLE_ENTRY_DATA(TABLE, ENTRY), DATA_SIZE); \ } while (0) -typedef Py_uhash_t (*_Py_hashtable_hash_func) (const void *key); -typedef int (*_Py_hashtable_compare_func) (const void *key, const _Py_hashtable_entry_t *he); +typedef Py_uhash_t (*_Py_hashtable_hash_func) (size_t key_size, + const void *pkey); +typedef int (*_Py_hashtable_compare_func) (size_t key_size, + const void *pkey, + const _Py_hashtable_entry_t *he); typedef void* (*_Py_hashtable_copy_data_func)(void *data); typedef void (*_Py_hashtable_free_data_func)(void *data); typedef size_t (*_Py_hashtable_get_data_size_func)(void *data); @@ -56,6 +61,7 @@ typedef struct { size_t num_buckets; size_t entries; /* Total number of entries in the table. */ _Py_slist_t *buckets; + size_t key_size; size_t data_size; _Py_hashtable_hash_func hash_func; @@ -67,15 +73,21 @@ typedef struct { } _Py_hashtable_t; /* hash and compare functions for integers and pointers */ -PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(const void *key); -PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_int(const void *key); -PyAPI_FUNC(int) _Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry); +PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr( + size_t key_size, + const void *pkey); +PyAPI_FUNC(int) _Py_hashtable_compare_direct( + size_t key_size, + const void *pkey, + const _Py_hashtable_entry_t *entry); PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new( + size_t key_size, size_t data_size, _Py_hashtable_hash_func hash_func, _Py_hashtable_compare_func compare_func); PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full( + size_t key_size, size_t data_size, size_t init_size, _Py_hashtable_hash_func hash_func, @@ -88,40 +100,65 @@ PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_copy(_Py_hashtable_t *src); PyAPI_FUNC(void) _Py_hashtable_clear(_Py_hashtable_t *ht); PyAPI_FUNC(void) _Py_hashtable_destroy(_Py_hashtable_t *ht); -typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_entry_t *entry, void *arg); +typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_t *ht, + _Py_hashtable_entry_t *entry, + void *arg); PyAPI_FUNC(int) _Py_hashtable_foreach( _Py_hashtable_t *ht, - _Py_hashtable_foreach_func func, void *arg); + _Py_hashtable_foreach_func func, + void *arg); PyAPI_FUNC(size_t) _Py_hashtable_size(_Py_hashtable_t *ht); PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry( _Py_hashtable_t *ht, - const void *key); + size_t key_size, + const void *pkey); + +/* Don't call directly this function, + but use _Py_HASHTABLE_SET() and _Py_HASHTABLE_SET_NODATA() macros */ PyAPI_FUNC(int) _Py_hashtable_set( _Py_hashtable_t *ht, - const void *key, - void *data, - size_t data_size); + size_t key_size, + const void *pkey, + size_t data_size, + void *data); + +/* Don't call directly this function, but use _Py_HASHTABLE_GET() macro */ PyAPI_FUNC(int) _Py_hashtable_get( _Py_hashtable_t *ht, - const void *key, - void *data, - size_t data_size); + size_t key_size, + const void *pkey, + size_t data_size, + void *data); + +/* Don't call directly this function, but use _Py_HASHTABLE_POP() macro */ PyAPI_FUNC(int) _Py_hashtable_pop( _Py_hashtable_t *ht, - const void *key, - void *data, - size_t data_size); + size_t key_size, + const void *pkey, + size_t data_size, + void *data); + PyAPI_FUNC(void) _Py_hashtable_delete( _Py_hashtable_t *ht, - const void *key); + size_t key_size, + const void *pkey); #define _Py_HASHTABLE_SET(TABLE, KEY, DATA) \ - _Py_hashtable_set(TABLE, KEY, &(DATA), sizeof(DATA)) + _Py_hashtable_set(TABLE, sizeof(KEY), &KEY, sizeof(DATA), &(DATA)) + +#define _Py_HASHTABLE_SET_NODATA(TABLE, KEY) \ + _Py_hashtable_set(TABLE, sizeof(KEY), &KEY, 0, NULL) + +#define _Py_HASHTABLE_GET_ENTRY(TABLE, KEY) \ + _Py_hashtable_get_entry(TABLE, sizeof(KEY), &(KEY)) #define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \ - _Py_hashtable_get(TABLE, KEY, &(DATA), sizeof(DATA)) + _Py_hashtable_get(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA)) + +#define _Py_HASHTABLE_POP(TABLE, KEY, DATA) \ + _Py_hashtable_pop(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA)) #endif /* Py_LIMITED_API */ diff --git a/Python/marshal.c b/Python/marshal.c index 7a4b9d29b4e..64084f47030 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -263,7 +263,7 @@ w_ref(PyObject *v, char *flag, WFILE *p) if (Py_REFCNT(v) == 1) return 0; - entry = _Py_hashtable_get_entry(p->hashtable, v); + entry = _Py_HASHTABLE_GET_ENTRY(p->hashtable, v); if (entry != NULL) { /* write the reference index to the stream */ _Py_HASHTABLE_ENTRY_READ_DATA(p->hashtable, &w, sizeof(w), entry); @@ -571,7 +571,8 @@ static int w_init_refs(WFILE *wf, int version) { if (version >= 3) { - wf->hashtable = _Py_hashtable_new(sizeof(int), _Py_hashtable_hash_ptr, + wf->hashtable = _Py_hashtable_new(sizeof(void *), sizeof(int), + _Py_hashtable_hash_ptr, _Py_hashtable_compare_direct); if (wf->hashtable == NULL) { PyErr_NoMemory(); @@ -582,9 +583,11 @@ w_init_refs(WFILE *wf, int version) } static int -w_decref_entry(_Py_hashtable_entry_t *entry, void *Py_UNUSED(data)) +w_decref_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry, void *Py_UNUSED(data)) { - Py_XDECREF(entry->key); + void *entry_key = *(void **)_Py_HASHTABLE_ENTRY_KEY(entry); + assert(ht->key_size == sizeof(entry_key)); + Py_XDECREF(entry_key); return 0; }