From a482dc500b6ec4889f6a126ba08cbad6c11e37bc Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 14 May 2020 21:55:47 +0200 Subject: [PATCH] bpo-40602: Write unit tests for _Py_hashtable_t (GH-20091) Cleanup also hashtable.c. Rename _Py_hashtable_t members: * Rename entries to nentries * Rename num_buckets to nbuckets --- Include/internal/pycore_hashtable.h | 23 ++-- Modules/_testinternalcapi.c | 88 ++++++++++++++++ Python/hashtable.c | 158 +++++++++------------------- Python/marshal.c | 2 +- 4 files changed, 151 insertions(+), 120 deletions(-) diff --git a/Include/internal/pycore_hashtable.h b/Include/internal/pycore_hashtable.h index 2990f9e0c1c..18757abc28c 100644 --- a/Include/internal/pycore_hashtable.h +++ b/Include/internal/pycore_hashtable.h @@ -48,18 +48,18 @@ typedef _Py_hashtable_entry_t* (*_Py_hashtable_get_entry_func)(_Py_hashtable_t * const void *key); typedef struct { - /* allocate a memory block */ + // Allocate a memory block void* (*malloc) (size_t size); - /* release a memory block */ + // Release a memory block void (*free) (void *ptr); } _Py_hashtable_allocator_t; /* _Py_hashtable: table */ struct _Py_hashtable_t { - size_t num_buckets; - size_t entries; /* Total number of entries in the table. */ + size_t nentries; // Total number of entries in the table + size_t nbuckets; _Py_slist_t *buckets; _Py_hashtable_get_entry_func get_entry_func; @@ -70,10 +70,10 @@ struct _Py_hashtable_t { _Py_hashtable_allocator_t alloc; }; -/* hash a pointer (void*) */ +/* Hash a pointer (void*) */ PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(const void *key); -/* comparison using memcmp() */ +/* Comparison using memcmp() */ PyAPI_FUNC(int) _Py_hashtable_compare_direct( const void *key1, const void *key2); @@ -129,13 +129,14 @@ _Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key) Use _Py_hashtable_get_entry() to distinguish entry value equal to NULL and entry not found. */ -extern void *_Py_hashtable_get(_Py_hashtable_t *ht, const void *key); +PyAPI_FUNC(void*) _Py_hashtable_get(_Py_hashtable_t *ht, const void *key); -// Remove a key and its associated value without calling key and value destroy -// functions. -// Return the removed value if the key was found. -// Return NULL if the key was not found. +/* Remove a key and its associated value without calling key and value destroy + functions. + + Return the removed value if the key was found. + Return NULL if the key was not found. */ PyAPI_FUNC(void*) _Py_hashtable_steal( _Py_hashtable_t *ht, const void *key); diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 1b7563cb20f..3ae387d945d 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -14,6 +14,7 @@ #include "Python.h" #include "pycore_byteswap.h" // _Py_bswap32() #include "pycore_initconfig.h" // _Py_GetConfigsAsDict() +#include "pycore_hashtable.h" // _Py_hashtable_new() #include "pycore_gc.h" // PyGC_Head @@ -62,10 +63,97 @@ test_bswap(PyObject *self, PyObject *Py_UNUSED(args)) } +#define TO_PTR(ch) ((void*)(uintptr_t)ch) +#define FROM_PTR(ptr) ((uintptr_t)ptr) +#define VALUE(key) (1 + ((int)(key) - 'a')) + +static Py_uhash_t +hash_char(const void *key) +{ + char ch = (char)FROM_PTR(key); + return ch; +} + + +static int +hashtable_cb(_Py_hashtable_t *table, + const void *key_ptr, const void *value_ptr, + void *user_data) +{ + int *count = (int *)user_data; + char key = (char)FROM_PTR(key_ptr); + int value = (int)FROM_PTR(value_ptr); + assert(value == VALUE(key)); + *count += 1; + return 0; +} + + +static PyObject* +test_hashtable(PyObject *self, PyObject *Py_UNUSED(args)) +{ + _Py_hashtable_t *table = _Py_hashtable_new(hash_char, + _Py_hashtable_compare_direct); + if (table == NULL) { + return PyErr_NoMemory(); + } + + // Test _Py_hashtable_set() + char key; + for (key='a'; key <= 'z'; key++) { + int value = VALUE(key); + if (_Py_hashtable_set(table, TO_PTR(key), TO_PTR(value)) < 0) { + _Py_hashtable_destroy(table); + return PyErr_NoMemory(); + } + } + assert(table->nentries == 26); + assert(table->nbuckets > table->nentries); + + // Test _Py_hashtable_get_entry() + for (key='a'; key <= 'z'; key++) { + _Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(table, TO_PTR(key)); + assert(entry != NULL); + assert(entry->key = TO_PTR(key)); + assert(entry->value = TO_PTR(VALUE(key))); + } + + // Test _Py_hashtable_get() + for (key='a'; key <= 'z'; key++) { + void *value_ptr = _Py_hashtable_get(table, TO_PTR(key)); + int value = (int)FROM_PTR(value_ptr); + assert(value == VALUE(key)); + } + + // Test _Py_hashtable_steal() + key = 'p'; + void *value_ptr = _Py_hashtable_steal(table, TO_PTR(key)); + int value = (int)FROM_PTR(value_ptr); + assert(value == VALUE(key)); + + assert(table->nentries == 25); + + // Test _Py_hashtable_foreach() + int count = 0; + int res = _Py_hashtable_foreach(table, hashtable_cb, &count); + assert(res == 0); + assert(count == 25); + + // Test _Py_hashtable_clear() + _Py_hashtable_clear(table); + assert(table->nentries == 0); + assert(_Py_hashtable_get(table, TO_PTR('x')) == NULL); + + _Py_hashtable_destroy(table); + Py_RETURN_NONE; +} + + static PyMethodDef TestMethods[] = { {"get_configs", get_configs, METH_NOARGS}, {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, {"test_bswap", test_bswap, METH_NOARGS}, + {"test_hashtable", test_hashtable, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Python/hashtable.c b/Python/hashtable.c index d1467ad94ed..45c52859ac2 100644 --- a/Python/hashtable.c +++ b/Python/hashtable.c @@ -119,66 +119,20 @@ round_size(size_t s) size_t _Py_hashtable_size(const _Py_hashtable_t *ht) { - size_t size; - - size = sizeof(_Py_hashtable_t); - + size_t size = sizeof(_Py_hashtable_t); /* buckets */ - size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *); - + size += ht->nbuckets * sizeof(_Py_hashtable_entry_t *); /* entries */ - size += ht->entries * sizeof(_Py_hashtable_entry_t); - + size += ht->nentries * sizeof(_Py_hashtable_entry_t); return size; } -#ifdef Py_DEBUG -void -_Py_hashtable_print_stats(_Py_hashtable_t *ht) -{ - size_t size; - size_t chain_len, max_chain_len, total_chain_len, nchains; - _Py_hashtable_entry_t *entry; - size_t hv; - double load; - - size = _Py_hashtable_size(ht); - - load = (double)ht->entries / ht->num_buckets; - - max_chain_len = 0; - total_chain_len = 0; - nchains = 0; - for (hv = 0; hv < ht->num_buckets; hv++) { - entry = TABLE_HEAD(ht, hv); - if (entry != NULL) { - chain_len = 0; - for (; entry; entry = ENTRY_NEXT(entry)) { - chain_len++; - } - if (chain_len > max_chain_len) - max_chain_len = chain_len; - total_chain_len += chain_len; - nchains++; - } - } - printf("hash table %p: entries=%" - PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ", - (void *)ht, ht->entries, ht->num_buckets, load * 100.0); - if (nchains) - printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains); - printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u KiB\n", - max_chain_len, size / 1024); -} -#endif - - _Py_hashtable_entry_t * _Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key) { Py_uhash_t key_hash = ht->hash_func(key); - size_t index = key_hash & (ht->num_buckets - 1); + size_t index = key_hash & (ht->nbuckets - 1); _Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index); while (1) { if (entry == NULL) { @@ -200,7 +154,7 @@ static _Py_hashtable_entry_t * _Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *key) { Py_uhash_t key_hash = _Py_hashtable_hash_ptr(key); - size_t index = key_hash & (ht->num_buckets - 1); + size_t index = key_hash & (ht->nbuckets - 1); _Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index); while (1) { if (entry == NULL) { @@ -220,7 +174,7 @@ void* _Py_hashtable_steal(_Py_hashtable_t *ht, const void *key) { Py_uhash_t key_hash = ht->hash_func(key); - size_t index = key_hash & (ht->num_buckets - 1); + size_t index = key_hash & (ht->nbuckets - 1); _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index); _Py_hashtable_entry_t *previous = NULL; @@ -238,12 +192,12 @@ _Py_hashtable_steal(_Py_hashtable_t *ht, const void *key) _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous, (_Py_slist_item_t *)entry); - ht->entries--; + ht->nentries--; void *value = entry->value; ht->alloc.free(entry); - if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW) { + if ((float)ht->nentries / (float)ht->nbuckets < HASHTABLE_LOW) { hashtable_rehash(ht); } return value; @@ -263,8 +217,6 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value) assert(entry == NULL); #endif - Py_uhash_t key_hash = ht->hash_func(key); - size_t index = key_hash & (ht->num_buckets - 1); entry = ht->alloc.malloc(sizeof(_Py_hashtable_entry_t)); if (entry == NULL) { @@ -272,15 +224,17 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value) return -1; } - entry->key_hash = key_hash; + entry->key_hash = ht->hash_func(key); entry->key = (void *)key; entry->value = value; + size_t index = entry->key_hash & (ht->nbuckets - 1); _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry); - ht->entries++; + ht->nentries++; - if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH) + if ((float)ht->nentries / (float)ht->nbuckets > HASHTABLE_HIGH) { hashtable_rehash(ht); + } return 0; } @@ -303,14 +257,14 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht, _Py_hashtable_foreach_func func, void *user_data) { - _Py_hashtable_entry_t *entry; - size_t hv; - - for (hv = 0; hv < ht->num_buckets; hv++) { - for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) { + for (size_t hv = 0; hv < ht->nbuckets; hv++) { + _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, hv); + while (entry != NULL) { int res = func(ht, entry->key, entry->value, user_data); - if (res) + if (res) { return res; + } + entry = ENTRY_NEXT(entry); } } return 0; @@ -320,44 +274,35 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht, static void hashtable_rehash(_Py_hashtable_t *ht) { - size_t buckets_size, new_size, bucket; - _Py_slist_t *old_buckets = NULL; - size_t old_num_buckets; - - new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR)); - if (new_size == ht->num_buckets) + size_t new_size = round_size((size_t)(ht->nentries * HASHTABLE_REHASH_FACTOR)); + if (new_size == ht->nbuckets) { return; + } - old_num_buckets = ht->num_buckets; - - buckets_size = new_size * sizeof(ht->buckets[0]); - old_buckets = ht->buckets; - ht->buckets = ht->alloc.malloc(buckets_size); - if (ht->buckets == NULL) { - /* cancel rehash on memory allocation failure */ - ht->buckets = old_buckets ; + size_t buckets_size = new_size * sizeof(ht->buckets[0]); + _Py_slist_t *new_buckets = ht->alloc.malloc(buckets_size); + if (new_buckets == NULL) { /* memory allocation failed */ return; } - memset(ht->buckets, 0, buckets_size); - - ht->num_buckets = new_size; - - for (bucket = 0; bucket < old_num_buckets; bucket++) { - _Py_hashtable_entry_t *entry, *next; - for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) { - size_t entry_index; - + memset(new_buckets, 0, buckets_size); + for (size_t bucket = 0; bucket < ht->nbuckets; bucket++) { + _Py_hashtable_entry_t *entry = BUCKETS_HEAD(ht->buckets[bucket]); + while (entry != NULL) { assert(ht->hash_func(entry->key) == entry->key_hash); - next = ENTRY_NEXT(entry); - entry_index = entry->key_hash & (new_size - 1); + _Py_hashtable_entry_t *next = ENTRY_NEXT(entry); + size_t entry_index = entry->key_hash & (new_size - 1); - _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry); + _Py_slist_prepend(&new_buckets[entry_index], (_Py_slist_item_t*)entry); + + entry = next; } } - ht->alloc.free(old_buckets); + ht->alloc.free(ht->buckets); + ht->nbuckets = new_size; + ht->buckets = new_buckets; } @@ -368,10 +313,7 @@ _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func, _Py_hashtable_destroy_func value_destroy_func, _Py_hashtable_allocator_t *allocator) { - _Py_hashtable_t *ht; - size_t buckets_size; _Py_hashtable_allocator_t alloc; - if (allocator == NULL) { alloc.malloc = PyMem_Malloc; alloc.free = PyMem_Free; @@ -380,14 +322,15 @@ _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func, alloc = *allocator; } - ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t)); - if (ht == NULL) + _Py_hashtable_t *ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t)); + if (ht == NULL) { return ht; + } - ht->num_buckets = HASHTABLE_MIN_SIZE; - ht->entries = 0; + ht->nbuckets = HASHTABLE_MIN_SIZE; + ht->nentries = 0; - buckets_size = ht->num_buckets * sizeof(ht->buckets[0]); + size_t buckets_size = ht->nbuckets * sizeof(ht->buckets[0]); ht->buckets = alloc.malloc(buckets_size); if (ht->buckets == NULL) { alloc.free(ht); @@ -435,17 +378,16 @@ _Py_hashtable_destroy_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry) void _Py_hashtable_clear(_Py_hashtable_t *ht) { - _Py_hashtable_entry_t *entry, *next; - size_t i; - - for (i=0; i < ht->num_buckets; i++) { - for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) { - next = ENTRY_NEXT(entry); + for (size_t i=0; i < ht->nbuckets; i++) { + _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i); + while (entry != NULL) { + _Py_hashtable_entry_t *next = ENTRY_NEXT(entry); _Py_hashtable_destroy_entry(ht, entry); + entry = next; } _Py_slist_init(&ht->buckets[i]); } - ht->entries = 0; + ht->nentries = 0; hashtable_rehash(ht); } @@ -453,7 +395,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht) void _Py_hashtable_destroy(_Py_hashtable_t *ht) { - for (size_t i = 0; i < ht->num_buckets; i++) { + for (size_t i = 0; i < ht->nbuckets; i++) { _Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i); while (entry) { _Py_hashtable_entry_t *entry_next = ENTRY_NEXT(entry); diff --git a/Python/marshal.c b/Python/marshal.c index b096ff89322..a0f6b981260 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -312,7 +312,7 @@ w_ref(PyObject *v, char *flag, WFILE *p) w_long(w, p); return 1; } else { - size_t s = p->hashtable->entries; + size_t s = p->hashtable->nentries; /* we don't support long indices */ if (s >= 0x7fffffff) { PyErr_SetString(PyExc_ValueError, "too many objects");