From 7c6e97077525f0ad3cfa0971028313b9079449fd Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 12 May 2020 13:31:59 +0200 Subject: [PATCH] bpo-40602: Optimize _Py_hashtable for pointer keys (GH-20051) Optimize _Py_hashtable_get() and _Py_hashtable_get_entry() for pointer keys: * key_size == sizeof(void*) * hash_func == _Py_hashtable_hash_ptr * compare_func == _Py_hashtable_compare_direct Changes: * Add get_func and get_entry_func members to _Py_hashtable_t * Convert _Py_hashtable_get() and _Py_hashtable_get_entry() functions to static nline functions. * Add specialized get and get entry for pointer keys. --- Include/internal/pycore_hashtable.h | 40 ++++-- Python/hashtable.c | 209 +++++++++++++++++----------- 2 files changed, 154 insertions(+), 95 deletions(-) diff --git a/Include/internal/pycore_hashtable.h b/Include/internal/pycore_hashtable.h index 585f76b51d7..6e094e94376 100644 --- a/Include/internal/pycore_hashtable.h +++ b/Include/internal/pycore_hashtable.h @@ -76,12 +76,17 @@ typedef struct { /* Forward declaration */ struct _Py_hashtable_t; +typedef struct _Py_hashtable_t _Py_hashtable_t; -typedef Py_uhash_t (*_Py_hashtable_hash_func) (struct _Py_hashtable_t *ht, +typedef Py_uhash_t (*_Py_hashtable_hash_func) (_Py_hashtable_t *ht, const void *pkey); -typedef int (*_Py_hashtable_compare_func) (struct _Py_hashtable_t *ht, +typedef int (*_Py_hashtable_compare_func) (_Py_hashtable_t *ht, const void *pkey, const _Py_hashtable_entry_t *he); +typedef _Py_hashtable_entry_t* (*_Py_hashtable_get_entry_func)(_Py_hashtable_t *ht, + const void *pkey); +typedef int (*_Py_hashtable_get_func) (_Py_hashtable_t *ht, + const void *pkey, void *data); typedef struct { /* allocate a memory block */ @@ -93,18 +98,19 @@ typedef struct { /* _Py_hashtable: table */ - -typedef struct _Py_hashtable_t { +struct _Py_hashtable_t { size_t num_buckets; size_t entries; /* Total number of entries in the table. */ _Py_slist_t *buckets; size_t key_size; size_t data_size; + _Py_hashtable_get_func get_func; + _Py_hashtable_get_entry_func get_entry_func; _Py_hashtable_hash_func hash_func; _Py_hashtable_compare_func compare_func; _Py_hashtable_allocator_t alloc; -} _Py_hashtable_t; +}; /* hash a pointer (void*) */ PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr( @@ -176,10 +182,12 @@ PyAPI_FUNC(int) _Py_hashtable_set( Don't call directly this function, but use _Py_HASHTABLE_GET_ENTRY() macro */ -PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry( - _Py_hashtable_t *ht, - size_t key_size, - const void *pkey); +static inline _Py_hashtable_entry_t * +_Py_hashtable_get_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey) +{ + assert(key_size == ht->key_size); + return ht->get_entry_func(ht, pkey); +} #define _Py_HASHTABLE_GET_ENTRY(TABLE, KEY) \ _Py_hashtable_get_entry(TABLE, sizeof(KEY), &(KEY)) @@ -189,12 +197,14 @@ PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry( exists, return 0 if the entry does not exist. Don't call directly this function, but use _Py_HASHTABLE_GET() macro */ -PyAPI_FUNC(int) _Py_hashtable_get( - _Py_hashtable_t *ht, - size_t key_size, - const void *pkey, - size_t data_size, - void *data); +static inline int +_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size, const void *pkey, + size_t data_size, void *data) +{ + assert(key_size == ht->key_size); + assert(data_size == ht->data_size); + return ht->get_func(ht, pkey, data); +} #define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \ _Py_hashtable_get(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA)) diff --git a/Python/hashtable.c b/Python/hashtable.c index e9f02d8650e..1548c2e4618 100644 --- a/Python/hashtable.c +++ b/Python/hashtable.c @@ -108,7 +108,6 @@ Py_uhash_t _Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey) { void *key; - _Py_HASHTABLE_READ_KEY(ht, pkey, key); return (Py_uhash_t)_Py_HashPointer(key); } @@ -137,61 +136,6 @@ round_size(size_t s) } -_Py_hashtable_t * -_Py_hashtable_new_full(size_t key_size, size_t data_size, - size_t init_size, - _Py_hashtable_hash_func hash_func, - _Py_hashtable_compare_func compare_func, - _Py_hashtable_allocator_t *allocator) -{ - _Py_hashtable_t *ht; - size_t buckets_size; - _Py_hashtable_allocator_t alloc; - - if (allocator == NULL) { - alloc.malloc = PyMem_Malloc; - alloc.free = PyMem_Free; - } - else { - alloc = *allocator; - } - - ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t)); - if (ht == NULL) - return ht; - - ht->num_buckets = round_size(init_size); - ht->entries = 0; - ht->key_size = key_size; - ht->data_size = data_size; - - buckets_size = ht->num_buckets * sizeof(ht->buckets[0]); - ht->buckets = alloc.malloc(buckets_size); - if (ht->buckets == NULL) { - alloc.free(ht); - return NULL; - } - memset(ht->buckets, 0, buckets_size); - - ht->hash_func = hash_func; - ht->compare_func = compare_func; - ht->alloc = alloc; - return ht; -} - - -_Py_hashtable_t * -_Py_hashtable_new(size_t key_size, size_t data_size, - _Py_hashtable_hash_func hash_func, - _Py_hashtable_compare_func compare_func) -{ - return _Py_hashtable_new_full(key_size, data_size, - HASHTABLE_MIN_SIZE, - hash_func, compare_func, - NULL); -} - - size_t _Py_hashtable_size(_Py_hashtable_t *ht) { @@ -251,23 +195,20 @@ _Py_hashtable_print_stats(_Py_hashtable_t *ht) _Py_hashtable_entry_t * -_Py_hashtable_get_entry(_Py_hashtable_t *ht, - size_t key_size, const void *pkey) +_Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *pkey) { - Py_uhash_t key_hash; - size_t index; - _Py_hashtable_entry_t *entry; - - assert(key_size == ht->key_size); - - key_hash = ht->hash_func(ht, pkey); - index = key_hash & (ht->num_buckets - 1); - - for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) { - if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry)) + Py_uhash_t key_hash = ht->hash_func(ht, pkey); + size_t index = key_hash & (ht->num_buckets - 1); + _Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index); + while (1) { + if (entry == NULL) { + return NULL; + } + if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry)) { break; + } + entry = ENTRY_NEXT(entry); } - return entry; } @@ -324,7 +265,7 @@ _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey, /* Don't write the assertion on a single line because it is interesting to know the duplicated entry if the assertion failed. The entry can be read using a debugger. */ - entry = _Py_hashtable_get_entry(ht, key_size, pkey); + entry = ht->get_entry_func(ht, pkey); assert(entry == NULL); #endif @@ -352,18 +293,62 @@ _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey, int -_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey, - size_t data_size, void *data) +_Py_hashtable_get_generic(_Py_hashtable_t *ht, const void *pkey, void *data) { - _Py_hashtable_entry_t *entry; - assert(data != NULL); - - entry = _Py_hashtable_get_entry(ht, key_size, pkey); - if (entry == NULL) + _Py_hashtable_entry_t *entry = ht->get_entry_func(ht, pkey); + if (entry != NULL) { + ENTRY_READ_PDATA(ht, entry, ht->data_size, data); + return 1; + } + else { return 0; - ENTRY_READ_PDATA(ht, entry, data_size, data); - return 1; + } +} + + +// Specialized for: +// key_size == sizeof(void*) +// hash_func == _Py_hashtable_hash_ptr +// compare_func == _Py_hashtable_compare_direct +_Py_hashtable_entry_t * +_Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *pkey) +{ + Py_uhash_t key_hash = _Py_hashtable_hash_ptr(ht, pkey); + size_t index = key_hash & (ht->num_buckets - 1); + _Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index); + while (1) { + if (entry == NULL) { + return NULL; + } + if (entry->key_hash == key_hash) { + const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry); + if (memcmp(pkey, pkey2, sizeof(void*)) == 0) { + break; + } + } + entry = ENTRY_NEXT(entry); + } + return entry; +} + + +// Specialized for: +// key_size == sizeof(void*) +// hash_func == _Py_hashtable_hash_ptr +// compare_func == _Py_hashtable_compare_direct +int +_Py_hashtable_get_ptr(_Py_hashtable_t *ht, const void *pkey, void *data) +{ + assert(data != NULL); + _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry_ptr(ht, pkey); + if (entry != NULL) { + ENTRY_READ_PDATA(ht, entry, ht->data_size, data); + return 1; + } + else { + return 0; + } } @@ -454,6 +439,70 @@ hashtable_rehash(_Py_hashtable_t *ht) } +_Py_hashtable_t * +_Py_hashtable_new_full(size_t key_size, size_t data_size, + size_t init_size, + _Py_hashtable_hash_func hash_func, + _Py_hashtable_compare_func compare_func, + _Py_hashtable_allocator_t *allocator) +{ + _Py_hashtable_t *ht; + size_t buckets_size; + _Py_hashtable_allocator_t alloc; + + if (allocator == NULL) { + alloc.malloc = PyMem_Malloc; + alloc.free = PyMem_Free; + } + else { + alloc = *allocator; + } + + ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t)); + if (ht == NULL) + return ht; + + ht->num_buckets = round_size(init_size); + ht->entries = 0; + ht->key_size = key_size; + ht->data_size = data_size; + + buckets_size = ht->num_buckets * sizeof(ht->buckets[0]); + ht->buckets = alloc.malloc(buckets_size); + if (ht->buckets == NULL) { + alloc.free(ht); + return NULL; + } + memset(ht->buckets, 0, buckets_size); + + ht->get_func = _Py_hashtable_get_generic; + ht->get_entry_func = _Py_hashtable_get_entry_generic; + ht->hash_func = hash_func; + ht->compare_func = compare_func; + ht->alloc = alloc; + if (ht->key_size == sizeof(void*) + && ht->hash_func == _Py_hashtable_hash_ptr + && ht->compare_func == _Py_hashtable_compare_direct) + { + ht->get_func = _Py_hashtable_get_ptr; + ht->get_entry_func = _Py_hashtable_get_entry_ptr; + } + return ht; +} + + +_Py_hashtable_t * +_Py_hashtable_new(size_t key_size, size_t data_size, + _Py_hashtable_hash_func hash_func, + _Py_hashtable_compare_func compare_func) +{ + return _Py_hashtable_new_full(key_size, data_size, + HASHTABLE_MIN_SIZE, + hash_func, compare_func, + NULL); +} + + void _Py_hashtable_clear(_Py_hashtable_t *ht) {