bpo-40602: Write unit tests for _Py_hashtable_t (GH-20091)
Cleanup also hashtable.c. Rename _Py_hashtable_t members: * Rename entries to nentries * Rename num_buckets to nbuckets
This commit is contained in:
parent
f2c3b6823b
commit
a482dc500b
|
@ -48,18 +48,18 @@ typedef _Py_hashtable_entry_t* (*_Py_hashtable_get_entry_func)(_Py_hashtable_t *
|
|||
const void *key);
|
||||
|
||||
typedef struct {
|
||||
/* allocate a memory block */
|
||||
// Allocate a memory block
|
||||
void* (*malloc) (size_t size);
|
||||
|
||||
/* release a memory block */
|
||||
// Release a memory block
|
||||
void (*free) (void *ptr);
|
||||
} _Py_hashtable_allocator_t;
|
||||
|
||||
|
||||
/* _Py_hashtable: table */
|
||||
struct _Py_hashtable_t {
|
||||
size_t num_buckets;
|
||||
size_t entries; /* Total number of entries in the table. */
|
||||
size_t nentries; // Total number of entries in the table
|
||||
size_t nbuckets;
|
||||
_Py_slist_t *buckets;
|
||||
|
||||
_Py_hashtable_get_entry_func get_entry_func;
|
||||
|
@ -70,10 +70,10 @@ struct _Py_hashtable_t {
|
|||
_Py_hashtable_allocator_t alloc;
|
||||
};
|
||||
|
||||
/* hash a pointer (void*) */
|
||||
/* Hash a pointer (void*) */
|
||||
PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(const void *key);
|
||||
|
||||
/* comparison using memcmp() */
|
||||
/* Comparison using memcmp() */
|
||||
PyAPI_FUNC(int) _Py_hashtable_compare_direct(
|
||||
const void *key1,
|
||||
const void *key2);
|
||||
|
@ -129,13 +129,14 @@ _Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key)
|
|||
|
||||
Use _Py_hashtable_get_entry() to distinguish entry value equal to NULL
|
||||
and entry not found. */
|
||||
extern void *_Py_hashtable_get(_Py_hashtable_t *ht, const void *key);
|
||||
PyAPI_FUNC(void*) _Py_hashtable_get(_Py_hashtable_t *ht, const void *key);
|
||||
|
||||
|
||||
// Remove a key and its associated value without calling key and value destroy
|
||||
// functions.
|
||||
// Return the removed value if the key was found.
|
||||
// Return NULL if the key was not found.
|
||||
/* Remove a key and its associated value without calling key and value destroy
|
||||
functions.
|
||||
|
||||
Return the removed value if the key was found.
|
||||
Return NULL if the key was not found. */
|
||||
PyAPI_FUNC(void*) _Py_hashtable_steal(
|
||||
_Py_hashtable_t *ht,
|
||||
const void *key);
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "Python.h"
|
||||
#include "pycore_byteswap.h" // _Py_bswap32()
|
||||
#include "pycore_initconfig.h" // _Py_GetConfigsAsDict()
|
||||
#include "pycore_hashtable.h" // _Py_hashtable_new()
|
||||
#include "pycore_gc.h" // PyGC_Head
|
||||
|
||||
|
||||
|
@ -62,10 +63,97 @@ test_bswap(PyObject *self, PyObject *Py_UNUSED(args))
|
|||
}
|
||||
|
||||
|
||||
#define TO_PTR(ch) ((void*)(uintptr_t)ch)
|
||||
#define FROM_PTR(ptr) ((uintptr_t)ptr)
|
||||
#define VALUE(key) (1 + ((int)(key) - 'a'))
|
||||
|
||||
static Py_uhash_t
|
||||
hash_char(const void *key)
|
||||
{
|
||||
char ch = (char)FROM_PTR(key);
|
||||
return ch;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
hashtable_cb(_Py_hashtable_t *table,
|
||||
const void *key_ptr, const void *value_ptr,
|
||||
void *user_data)
|
||||
{
|
||||
int *count = (int *)user_data;
|
||||
char key = (char)FROM_PTR(key_ptr);
|
||||
int value = (int)FROM_PTR(value_ptr);
|
||||
assert(value == VALUE(key));
|
||||
*count += 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static PyObject*
|
||||
test_hashtable(PyObject *self, PyObject *Py_UNUSED(args))
|
||||
{
|
||||
_Py_hashtable_t *table = _Py_hashtable_new(hash_char,
|
||||
_Py_hashtable_compare_direct);
|
||||
if (table == NULL) {
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
|
||||
// Test _Py_hashtable_set()
|
||||
char key;
|
||||
for (key='a'; key <= 'z'; key++) {
|
||||
int value = VALUE(key);
|
||||
if (_Py_hashtable_set(table, TO_PTR(key), TO_PTR(value)) < 0) {
|
||||
_Py_hashtable_destroy(table);
|
||||
return PyErr_NoMemory();
|
||||
}
|
||||
}
|
||||
assert(table->nentries == 26);
|
||||
assert(table->nbuckets > table->nentries);
|
||||
|
||||
// Test _Py_hashtable_get_entry()
|
||||
for (key='a'; key <= 'z'; key++) {
|
||||
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry(table, TO_PTR(key));
|
||||
assert(entry != NULL);
|
||||
assert(entry->key = TO_PTR(key));
|
||||
assert(entry->value = TO_PTR(VALUE(key)));
|
||||
}
|
||||
|
||||
// Test _Py_hashtable_get()
|
||||
for (key='a'; key <= 'z'; key++) {
|
||||
void *value_ptr = _Py_hashtable_get(table, TO_PTR(key));
|
||||
int value = (int)FROM_PTR(value_ptr);
|
||||
assert(value == VALUE(key));
|
||||
}
|
||||
|
||||
// Test _Py_hashtable_steal()
|
||||
key = 'p';
|
||||
void *value_ptr = _Py_hashtable_steal(table, TO_PTR(key));
|
||||
int value = (int)FROM_PTR(value_ptr);
|
||||
assert(value == VALUE(key));
|
||||
|
||||
assert(table->nentries == 25);
|
||||
|
||||
// Test _Py_hashtable_foreach()
|
||||
int count = 0;
|
||||
int res = _Py_hashtable_foreach(table, hashtable_cb, &count);
|
||||
assert(res == 0);
|
||||
assert(count == 25);
|
||||
|
||||
// Test _Py_hashtable_clear()
|
||||
_Py_hashtable_clear(table);
|
||||
assert(table->nentries == 0);
|
||||
assert(_Py_hashtable_get(table, TO_PTR('x')) == NULL);
|
||||
|
||||
_Py_hashtable_destroy(table);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
|
||||
static PyMethodDef TestMethods[] = {
|
||||
{"get_configs", get_configs, METH_NOARGS},
|
||||
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
|
||||
{"test_bswap", test_bswap, METH_NOARGS},
|
||||
{"test_hashtable", test_hashtable, METH_NOARGS},
|
||||
{NULL, NULL} /* sentinel */
|
||||
};
|
||||
|
||||
|
|
|
@ -119,66 +119,20 @@ round_size(size_t s)
|
|||
size_t
|
||||
_Py_hashtable_size(const _Py_hashtable_t *ht)
|
||||
{
|
||||
size_t size;
|
||||
|
||||
size = sizeof(_Py_hashtable_t);
|
||||
|
||||
size_t size = sizeof(_Py_hashtable_t);
|
||||
/* buckets */
|
||||
size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *);
|
||||
|
||||
size += ht->nbuckets * sizeof(_Py_hashtable_entry_t *);
|
||||
/* entries */
|
||||
size += ht->entries * sizeof(_Py_hashtable_entry_t);
|
||||
|
||||
size += ht->nentries * sizeof(_Py_hashtable_entry_t);
|
||||
return size;
|
||||
}
|
||||
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
void
|
||||
_Py_hashtable_print_stats(_Py_hashtable_t *ht)
|
||||
{
|
||||
size_t size;
|
||||
size_t chain_len, max_chain_len, total_chain_len, nchains;
|
||||
_Py_hashtable_entry_t *entry;
|
||||
size_t hv;
|
||||
double load;
|
||||
|
||||
size = _Py_hashtable_size(ht);
|
||||
|
||||
load = (double)ht->entries / ht->num_buckets;
|
||||
|
||||
max_chain_len = 0;
|
||||
total_chain_len = 0;
|
||||
nchains = 0;
|
||||
for (hv = 0; hv < ht->num_buckets; hv++) {
|
||||
entry = TABLE_HEAD(ht, hv);
|
||||
if (entry != NULL) {
|
||||
chain_len = 0;
|
||||
for (; entry; entry = ENTRY_NEXT(entry)) {
|
||||
chain_len++;
|
||||
}
|
||||
if (chain_len > max_chain_len)
|
||||
max_chain_len = chain_len;
|
||||
total_chain_len += chain_len;
|
||||
nchains++;
|
||||
}
|
||||
}
|
||||
printf("hash table %p: entries=%"
|
||||
PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ",
|
||||
(void *)ht, ht->entries, ht->num_buckets, load * 100.0);
|
||||
if (nchains)
|
||||
printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
|
||||
printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u KiB\n",
|
||||
max_chain_len, size / 1024);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
_Py_hashtable_entry_t *
|
||||
_Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *key)
|
||||
{
|
||||
Py_uhash_t key_hash = ht->hash_func(key);
|
||||
size_t index = key_hash & (ht->num_buckets - 1);
|
||||
size_t index = key_hash & (ht->nbuckets - 1);
|
||||
_Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
|
||||
while (1) {
|
||||
if (entry == NULL) {
|
||||
|
@ -200,7 +154,7 @@ static _Py_hashtable_entry_t *
|
|||
_Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *key)
|
||||
{
|
||||
Py_uhash_t key_hash = _Py_hashtable_hash_ptr(key);
|
||||
size_t index = key_hash & (ht->num_buckets - 1);
|
||||
size_t index = key_hash & (ht->nbuckets - 1);
|
||||
_Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
|
||||
while (1) {
|
||||
if (entry == NULL) {
|
||||
|
@ -220,7 +174,7 @@ void*
|
|||
_Py_hashtable_steal(_Py_hashtable_t *ht, const void *key)
|
||||
{
|
||||
Py_uhash_t key_hash = ht->hash_func(key);
|
||||
size_t index = key_hash & (ht->num_buckets - 1);
|
||||
size_t index = key_hash & (ht->nbuckets - 1);
|
||||
|
||||
_Py_hashtable_entry_t *entry = TABLE_HEAD(ht, index);
|
||||
_Py_hashtable_entry_t *previous = NULL;
|
||||
|
@ -238,12 +192,12 @@ _Py_hashtable_steal(_Py_hashtable_t *ht, const void *key)
|
|||
|
||||
_Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
|
||||
(_Py_slist_item_t *)entry);
|
||||
ht->entries--;
|
||||
ht->nentries--;
|
||||
|
||||
void *value = entry->value;
|
||||
ht->alloc.free(entry);
|
||||
|
||||
if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW) {
|
||||
if ((float)ht->nentries / (float)ht->nbuckets < HASHTABLE_LOW) {
|
||||
hashtable_rehash(ht);
|
||||
}
|
||||
return value;
|
||||
|
@ -263,8 +217,6 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value)
|
|||
assert(entry == NULL);
|
||||
#endif
|
||||
|
||||
Py_uhash_t key_hash = ht->hash_func(key);
|
||||
size_t index = key_hash & (ht->num_buckets - 1);
|
||||
|
||||
entry = ht->alloc.malloc(sizeof(_Py_hashtable_entry_t));
|
||||
if (entry == NULL) {
|
||||
|
@ -272,15 +224,17 @@ _Py_hashtable_set(_Py_hashtable_t *ht, const void *key, void *value)
|
|||
return -1;
|
||||
}
|
||||
|
||||
entry->key_hash = key_hash;
|
||||
entry->key_hash = ht->hash_func(key);
|
||||
entry->key = (void *)key;
|
||||
entry->value = value;
|
||||
|
||||
size_t index = entry->key_hash & (ht->nbuckets - 1);
|
||||
_Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
|
||||
ht->entries++;
|
||||
ht->nentries++;
|
||||
|
||||
if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
|
||||
if ((float)ht->nentries / (float)ht->nbuckets > HASHTABLE_HIGH) {
|
||||
hashtable_rehash(ht);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -303,15 +257,15 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht,
|
|||
_Py_hashtable_foreach_func func,
|
||||
void *user_data)
|
||||
{
|
||||
_Py_hashtable_entry_t *entry;
|
||||
size_t hv;
|
||||
|
||||
for (hv = 0; hv < ht->num_buckets; hv++) {
|
||||
for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
|
||||
for (size_t hv = 0; hv < ht->nbuckets; hv++) {
|
||||
_Py_hashtable_entry_t *entry = TABLE_HEAD(ht, hv);
|
||||
while (entry != NULL) {
|
||||
int res = func(ht, entry->key, entry->value, user_data);
|
||||
if (res)
|
||||
if (res) {
|
||||
return res;
|
||||
}
|
||||
entry = ENTRY_NEXT(entry);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -320,44 +274,35 @@ _Py_hashtable_foreach(_Py_hashtable_t *ht,
|
|||
static void
|
||||
hashtable_rehash(_Py_hashtable_t *ht)
|
||||
{
|
||||
size_t buckets_size, new_size, bucket;
|
||||
_Py_slist_t *old_buckets = NULL;
|
||||
size_t old_num_buckets;
|
||||
|
||||
new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
|
||||
if (new_size == ht->num_buckets)
|
||||
size_t new_size = round_size((size_t)(ht->nentries * HASHTABLE_REHASH_FACTOR));
|
||||
if (new_size == ht->nbuckets) {
|
||||
return;
|
||||
}
|
||||
|
||||
old_num_buckets = ht->num_buckets;
|
||||
|
||||
buckets_size = new_size * sizeof(ht->buckets[0]);
|
||||
old_buckets = ht->buckets;
|
||||
ht->buckets = ht->alloc.malloc(buckets_size);
|
||||
if (ht->buckets == NULL) {
|
||||
/* cancel rehash on memory allocation failure */
|
||||
ht->buckets = old_buckets ;
|
||||
size_t buckets_size = new_size * sizeof(ht->buckets[0]);
|
||||
_Py_slist_t *new_buckets = ht->alloc.malloc(buckets_size);
|
||||
if (new_buckets == NULL) {
|
||||
/* memory allocation failed */
|
||||
return;
|
||||
}
|
||||
memset(ht->buckets, 0, buckets_size);
|
||||
|
||||
ht->num_buckets = new_size;
|
||||
|
||||
for (bucket = 0; bucket < old_num_buckets; bucket++) {
|
||||
_Py_hashtable_entry_t *entry, *next;
|
||||
for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
|
||||
size_t entry_index;
|
||||
|
||||
memset(new_buckets, 0, buckets_size);
|
||||
|
||||
for (size_t bucket = 0; bucket < ht->nbuckets; bucket++) {
|
||||
_Py_hashtable_entry_t *entry = BUCKETS_HEAD(ht->buckets[bucket]);
|
||||
while (entry != NULL) {
|
||||
assert(ht->hash_func(entry->key) == entry->key_hash);
|
||||
next = ENTRY_NEXT(entry);
|
||||
entry_index = entry->key_hash & (new_size - 1);
|
||||
_Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
|
||||
size_t entry_index = entry->key_hash & (new_size - 1);
|
||||
|
||||
_Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
|
||||
_Py_slist_prepend(&new_buckets[entry_index], (_Py_slist_item_t*)entry);
|
||||
|
||||
entry = next;
|
||||
}
|
||||
}
|
||||
|
||||
ht->alloc.free(old_buckets);
|
||||
ht->alloc.free(ht->buckets);
|
||||
ht->nbuckets = new_size;
|
||||
ht->buckets = new_buckets;
|
||||
}
|
||||
|
||||
|
||||
|
@ -368,10 +313,7 @@ _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,
|
|||
_Py_hashtable_destroy_func value_destroy_func,
|
||||
_Py_hashtable_allocator_t *allocator)
|
||||
{
|
||||
_Py_hashtable_t *ht;
|
||||
size_t buckets_size;
|
||||
_Py_hashtable_allocator_t alloc;
|
||||
|
||||
if (allocator == NULL) {
|
||||
alloc.malloc = PyMem_Malloc;
|
||||
alloc.free = PyMem_Free;
|
||||
|
@ -380,14 +322,15 @@ _Py_hashtable_new_full(_Py_hashtable_hash_func hash_func,
|
|||
alloc = *allocator;
|
||||
}
|
||||
|
||||
ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
|
||||
if (ht == NULL)
|
||||
_Py_hashtable_t *ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
|
||||
if (ht == NULL) {
|
||||
return ht;
|
||||
}
|
||||
|
||||
ht->num_buckets = HASHTABLE_MIN_SIZE;
|
||||
ht->entries = 0;
|
||||
ht->nbuckets = HASHTABLE_MIN_SIZE;
|
||||
ht->nentries = 0;
|
||||
|
||||
buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
|
||||
size_t buckets_size = ht->nbuckets * sizeof(ht->buckets[0]);
|
||||
ht->buckets = alloc.malloc(buckets_size);
|
||||
if (ht->buckets == NULL) {
|
||||
alloc.free(ht);
|
||||
|
@ -435,17 +378,16 @@ _Py_hashtable_destroy_entry(_Py_hashtable_t *ht, _Py_hashtable_entry_t *entry)
|
|||
void
|
||||
_Py_hashtable_clear(_Py_hashtable_t *ht)
|
||||
{
|
||||
_Py_hashtable_entry_t *entry, *next;
|
||||
size_t i;
|
||||
|
||||
for (i=0; i < ht->num_buckets; i++) {
|
||||
for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
|
||||
next = ENTRY_NEXT(entry);
|
||||
for (size_t i=0; i < ht->nbuckets; i++) {
|
||||
_Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
|
||||
while (entry != NULL) {
|
||||
_Py_hashtable_entry_t *next = ENTRY_NEXT(entry);
|
||||
_Py_hashtable_destroy_entry(ht, entry);
|
||||
entry = next;
|
||||
}
|
||||
_Py_slist_init(&ht->buckets[i]);
|
||||
}
|
||||
ht->entries = 0;
|
||||
ht->nentries = 0;
|
||||
hashtable_rehash(ht);
|
||||
}
|
||||
|
||||
|
@ -453,7 +395,7 @@ _Py_hashtable_clear(_Py_hashtable_t *ht)
|
|||
void
|
||||
_Py_hashtable_destroy(_Py_hashtable_t *ht)
|
||||
{
|
||||
for (size_t i = 0; i < ht->num_buckets; i++) {
|
||||
for (size_t i = 0; i < ht->nbuckets; i++) {
|
||||
_Py_hashtable_entry_t *entry = TABLE_HEAD(ht, i);
|
||||
while (entry) {
|
||||
_Py_hashtable_entry_t *entry_next = ENTRY_NEXT(entry);
|
||||
|
|
|
@ -312,7 +312,7 @@ w_ref(PyObject *v, char *flag, WFILE *p)
|
|||
w_long(w, p);
|
||||
return 1;
|
||||
} else {
|
||||
size_t s = p->hashtable->entries;
|
||||
size_t s = p->hashtable->nentries;
|
||||
/* we don't support long indices */
|
||||
if (s >= 0x7fffffff) {
|
||||
PyErr_SetString(PyExc_ValueError, "too many objects");
|
||||
|
|
Loading…
Reference in New Issue