mirror of https://github.com/python/cpython
519 lines
15 KiB
C
519 lines
15 KiB
C
/* The implementation of the hash table (_Py_hashtable_t) is based on the cfuhash
|
|
project:
|
|
http://sourceforge.net/projects/libcfu/
|
|
|
|
Copyright of cfuhash:
|
|
----------------------------------
|
|
Creation date: 2005-06-24 21:22:40
|
|
Authors: Don
|
|
Change log:
|
|
|
|
Copyright (c) 2005 Don Owens
|
|
All rights reserved.
|
|
|
|
This code is released under the BSD license:
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above
|
|
copyright notice, this list of conditions and the following
|
|
disclaimer in the documentation and/or other materials provided
|
|
with the distribution.
|
|
|
|
* Neither the name of the author nor the names of its
|
|
contributors may be used to endorse or promote products derived
|
|
from this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
|
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
|
|
OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
----------------------------------
|
|
*/
|
|
|
|
#include "Python.h"
|
|
#include "hashtable.h"
|
|
|
|
#define HASHTABLE_MIN_SIZE 16
|
|
#define HASHTABLE_HIGH 0.50
|
|
#define HASHTABLE_LOW 0.10
|
|
#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
|
|
|
|
#define BUCKETS_HEAD(SLIST) \
|
|
((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
|
|
#define TABLE_HEAD(HT, BUCKET) \
|
|
((_Py_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
|
|
#define ENTRY_NEXT(ENTRY) \
|
|
((_Py_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
|
|
#define HASHTABLE_ITEM_SIZE(HT) \
|
|
(sizeof(_Py_hashtable_entry_t) + (HT)->data_size)
|
|
|
|
/* Forward declaration */
|
|
static void hashtable_rehash(_Py_hashtable_t *ht);
|
|
|
|
static void
|
|
_Py_slist_init(_Py_slist_t *list)
|
|
{
|
|
list->head = NULL;
|
|
}
|
|
|
|
static void
|
|
_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
|
|
{
|
|
item->next = list->head;
|
|
list->head = item;
|
|
}
|
|
|
|
static void
|
|
_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
|
|
_Py_slist_item_t *item)
|
|
{
|
|
if (previous != NULL)
|
|
previous->next = item->next;
|
|
else
|
|
list->head = item->next;
|
|
}
|
|
|
|
Py_uhash_t
|
|
_Py_hashtable_hash_int(const void *key)
|
|
{
|
|
return (Py_uhash_t)key;
|
|
}
|
|
|
|
Py_uhash_t
|
|
_Py_hashtable_hash_ptr(const void *key)
|
|
{
|
|
return (Py_uhash_t)_Py_HashPointer((void *)key);
|
|
}
|
|
|
|
int
|
|
_Py_hashtable_compare_direct(const void *key, const _Py_hashtable_entry_t *entry)
|
|
{
|
|
return entry->key == key;
|
|
}
|
|
|
|
/* makes sure the real size of the buckets array is a power of 2 */
|
|
static size_t
|
|
round_size(size_t s)
|
|
{
|
|
size_t i;
|
|
if (s < HASHTABLE_MIN_SIZE)
|
|
return HASHTABLE_MIN_SIZE;
|
|
i = 1;
|
|
while (i < s)
|
|
i <<= 1;
|
|
return i;
|
|
}
|
|
|
|
_Py_hashtable_t *
|
|
_Py_hashtable_new_full(size_t data_size, size_t init_size,
|
|
_Py_hashtable_hash_func hash_func,
|
|
_Py_hashtable_compare_func compare_func,
|
|
_Py_hashtable_copy_data_func copy_data_func,
|
|
_Py_hashtable_free_data_func free_data_func,
|
|
_Py_hashtable_get_data_size_func get_data_size_func,
|
|
_Py_hashtable_allocator_t *allocator)
|
|
{
|
|
_Py_hashtable_t *ht;
|
|
size_t buckets_size;
|
|
_Py_hashtable_allocator_t alloc;
|
|
|
|
if (allocator == NULL) {
|
|
alloc.malloc = PyMem_RawMalloc;
|
|
alloc.free = PyMem_RawFree;
|
|
}
|
|
else
|
|
alloc = *allocator;
|
|
|
|
ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
|
|
if (ht == NULL)
|
|
return ht;
|
|
|
|
ht->num_buckets = round_size(init_size);
|
|
ht->entries = 0;
|
|
ht->data_size = data_size;
|
|
|
|
buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
|
|
ht->buckets = alloc.malloc(buckets_size);
|
|
if (ht->buckets == NULL) {
|
|
alloc.free(ht);
|
|
return NULL;
|
|
}
|
|
memset(ht->buckets, 0, buckets_size);
|
|
|
|
ht->hash_func = hash_func;
|
|
ht->compare_func = compare_func;
|
|
ht->copy_data_func = copy_data_func;
|
|
ht->free_data_func = free_data_func;
|
|
ht->get_data_size_func = get_data_size_func;
|
|
ht->alloc = alloc;
|
|
return ht;
|
|
}
|
|
|
|
_Py_hashtable_t *
|
|
_Py_hashtable_new(size_t data_size,
|
|
_Py_hashtable_hash_func hash_func,
|
|
_Py_hashtable_compare_func compare_func)
|
|
{
|
|
return _Py_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE,
|
|
hash_func, compare_func,
|
|
NULL, NULL, NULL, NULL);
|
|
}
|
|
|
|
size_t
|
|
_Py_hashtable_size(_Py_hashtable_t *ht)
|
|
{
|
|
size_t size;
|
|
size_t hv;
|
|
|
|
size = sizeof(_Py_hashtable_t);
|
|
|
|
/* buckets */
|
|
size += ht->num_buckets * sizeof(_Py_hashtable_entry_t *);
|
|
|
|
/* entries */
|
|
size += ht->entries * HASHTABLE_ITEM_SIZE(ht);
|
|
|
|
/* data linked from entries */
|
|
if (ht->get_data_size_func) {
|
|
for (hv = 0; hv < ht->num_buckets; hv++) {
|
|
_Py_hashtable_entry_t *entry;
|
|
|
|
for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
|
|
void *data;
|
|
|
|
data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
|
|
size += ht->get_data_size_func(data);
|
|
}
|
|
}
|
|
}
|
|
return size;
|
|
}
|
|
|
|
#ifdef Py_DEBUG
|
|
void
|
|
_Py_hashtable_print_stats(_Py_hashtable_t *ht)
|
|
{
|
|
size_t size;
|
|
size_t chain_len, max_chain_len, total_chain_len, nchains;
|
|
_Py_hashtable_entry_t *entry;
|
|
size_t hv;
|
|
double load;
|
|
|
|
size = _Py_hashtable_size(ht);
|
|
|
|
load = (double)ht->entries / ht->num_buckets;
|
|
|
|
max_chain_len = 0;
|
|
total_chain_len = 0;
|
|
nchains = 0;
|
|
for (hv = 0; hv < ht->num_buckets; hv++) {
|
|
entry = TABLE_HEAD(ht, hv);
|
|
if (entry != NULL) {
|
|
chain_len = 0;
|
|
for (; entry; entry = ENTRY_NEXT(entry)) {
|
|
chain_len++;
|
|
}
|
|
if (chain_len > max_chain_len)
|
|
max_chain_len = chain_len;
|
|
total_chain_len += chain_len;
|
|
nchains++;
|
|
}
|
|
}
|
|
printf("hash table %p: entries=%zu/%zu (%.0f%%), ",
|
|
ht, ht->entries, ht->num_buckets, load * 100.0);
|
|
if (nchains)
|
|
printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
|
|
printf("max_chain_len=%zu, %zu kB\n",
|
|
max_chain_len, size / 1024);
|
|
}
|
|
#endif
|
|
|
|
/* Get an entry. Return NULL if the key does not exist. */
|
|
_Py_hashtable_entry_t *
|
|
_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key)
|
|
{
|
|
Py_uhash_t key_hash;
|
|
size_t index;
|
|
_Py_hashtable_entry_t *entry;
|
|
|
|
key_hash = ht->hash_func(key);
|
|
index = key_hash & (ht->num_buckets - 1);
|
|
|
|
for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
|
|
if (entry->key_hash == key_hash && ht->compare_func(key, entry))
|
|
break;
|
|
}
|
|
|
|
return entry;
|
|
}
|
|
|
|
static int
|
|
_hashtable_pop_entry(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
|
|
{
|
|
Py_uhash_t key_hash;
|
|
size_t index;
|
|
_Py_hashtable_entry_t *entry, *previous;
|
|
|
|
key_hash = ht->hash_func(key);
|
|
index = key_hash & (ht->num_buckets - 1);
|
|
|
|
previous = NULL;
|
|
for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
|
|
if (entry->key_hash == key_hash && ht->compare_func(key, entry))
|
|
break;
|
|
previous = entry;
|
|
}
|
|
|
|
if (entry == NULL)
|
|
return 0;
|
|
|
|
_Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
|
|
(_Py_slist_item_t *)entry);
|
|
ht->entries--;
|
|
|
|
if (data != NULL)
|
|
_Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
|
|
ht->alloc.free(entry);
|
|
|
|
if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
|
|
hashtable_rehash(ht);
|
|
return 1;
|
|
}
|
|
|
|
/* Add a new entry to the hash. The key must not be present in the hash table.
|
|
Return 0 on success, -1 on memory error. */
|
|
int
|
|
_Py_hashtable_set(_Py_hashtable_t *ht, const void *key,
|
|
void *data, size_t data_size)
|
|
{
|
|
Py_uhash_t key_hash;
|
|
size_t index;
|
|
_Py_hashtable_entry_t *entry;
|
|
|
|
assert(data != NULL || data_size == 0);
|
|
#ifndef NDEBUG
|
|
/* Don't write the assertion on a single line because it is interesting
|
|
to know the duplicated entry if the assertion failed. The entry can
|
|
be read using a debugger. */
|
|
entry = _Py_hashtable_get_entry(ht, key);
|
|
assert(entry == NULL);
|
|
#endif
|
|
|
|
key_hash = ht->hash_func(key);
|
|
index = key_hash & (ht->num_buckets - 1);
|
|
|
|
entry = ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
|
|
if (entry == NULL) {
|
|
/* memory allocation failed */
|
|
return -1;
|
|
}
|
|
|
|
entry->key = (void *)key;
|
|
entry->key_hash = key_hash;
|
|
|
|
assert(data_size == ht->data_size);
|
|
memcpy(_PY_HASHTABLE_ENTRY_DATA(entry), data, data_size);
|
|
|
|
_Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
|
|
ht->entries++;
|
|
|
|
if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
|
|
hashtable_rehash(ht);
|
|
return 0;
|
|
}
|
|
|
|
/* Get data from an entry. Copy entry data into data and return 1 if the entry
|
|
exists, return 0 if the entry does not exist. */
|
|
int
|
|
_Py_hashtable_get(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
|
|
{
|
|
_Py_hashtable_entry_t *entry;
|
|
|
|
assert(data != NULL);
|
|
|
|
entry = _Py_hashtable_get_entry(ht, key);
|
|
if (entry == NULL)
|
|
return 0;
|
|
_Py_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
|
|
return 1;
|
|
}
|
|
|
|
int
|
|
_Py_hashtable_pop(_Py_hashtable_t *ht, const void *key, void *data, size_t data_size)
|
|
{
|
|
assert(data != NULL);
|
|
assert(ht->free_data_func == NULL);
|
|
return _hashtable_pop_entry(ht, key, data, data_size);
|
|
}
|
|
|
|
/* Delete an entry. The entry must exist. */
|
|
void
|
|
_Py_hashtable_delete(_Py_hashtable_t *ht, const void *key)
|
|
{
|
|
#ifndef NDEBUG
|
|
int found = _hashtable_pop_entry(ht, key, NULL, 0);
|
|
assert(found);
|
|
#else
|
|
(void)_hashtable_pop_entry(ht, key, NULL, 0);
|
|
#endif
|
|
}
|
|
|
|
/* Prototype for a pointer to a function to be called foreach
|
|
key/value pair in the hash by hashtable_foreach(). Iteration
|
|
stops if a non-zero value is returned. */
|
|
int
|
|
_Py_hashtable_foreach(_Py_hashtable_t *ht,
|
|
int (*func) (_Py_hashtable_entry_t *entry, void *arg),
|
|
void *arg)
|
|
{
|
|
_Py_hashtable_entry_t *entry;
|
|
size_t hv;
|
|
|
|
for (hv = 0; hv < ht->num_buckets; hv++) {
|
|
for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
|
|
int res = func(entry, arg);
|
|
if (res)
|
|
return res;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
hashtable_rehash(_Py_hashtable_t *ht)
|
|
{
|
|
size_t buckets_size, new_size, bucket;
|
|
_Py_slist_t *old_buckets = NULL;
|
|
size_t old_num_buckets;
|
|
|
|
new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
|
|
if (new_size == ht->num_buckets)
|
|
return;
|
|
|
|
old_num_buckets = ht->num_buckets;
|
|
|
|
buckets_size = new_size * sizeof(ht->buckets[0]);
|
|
old_buckets = ht->buckets;
|
|
ht->buckets = ht->alloc.malloc(buckets_size);
|
|
if (ht->buckets == NULL) {
|
|
/* cancel rehash on memory allocation failure */
|
|
ht->buckets = old_buckets ;
|
|
/* memory allocation failed */
|
|
return;
|
|
}
|
|
memset(ht->buckets, 0, buckets_size);
|
|
|
|
ht->num_buckets = new_size;
|
|
|
|
for (bucket = 0; bucket < old_num_buckets; bucket++) {
|
|
_Py_hashtable_entry_t *entry, *next;
|
|
for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
|
|
size_t entry_index;
|
|
|
|
assert(ht->hash_func(entry->key) == entry->key_hash);
|
|
next = ENTRY_NEXT(entry);
|
|
entry_index = entry->key_hash & (new_size - 1);
|
|
|
|
_Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
|
|
}
|
|
}
|
|
|
|
ht->alloc.free(old_buckets);
|
|
}
|
|
|
|
void
|
|
_Py_hashtable_clear(_Py_hashtable_t *ht)
|
|
{
|
|
_Py_hashtable_entry_t *entry, *next;
|
|
size_t i;
|
|
|
|
for (i=0; i < ht->num_buckets; i++) {
|
|
for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
|
|
next = ENTRY_NEXT(entry);
|
|
if (ht->free_data_func)
|
|
ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
|
|
ht->alloc.free(entry);
|
|
}
|
|
_Py_slist_init(&ht->buckets[i]);
|
|
}
|
|
ht->entries = 0;
|
|
hashtable_rehash(ht);
|
|
}
|
|
|
|
void
|
|
_Py_hashtable_destroy(_Py_hashtable_t *ht)
|
|
{
|
|
size_t i;
|
|
|
|
for (i = 0; i < ht->num_buckets; i++) {
|
|
_Py_slist_item_t *entry = ht->buckets[i].head;
|
|
while (entry) {
|
|
_Py_slist_item_t *entry_next = entry->next;
|
|
if (ht->free_data_func)
|
|
ht->free_data_func(_Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
|
|
ht->alloc.free(entry);
|
|
entry = entry_next;
|
|
}
|
|
}
|
|
|
|
ht->alloc.free(ht->buckets);
|
|
ht->alloc.free(ht);
|
|
}
|
|
|
|
/* Return a copy of the hash table */
|
|
_Py_hashtable_t *
|
|
_Py_hashtable_copy(_Py_hashtable_t *src)
|
|
{
|
|
_Py_hashtable_t *dst;
|
|
_Py_hashtable_entry_t *entry;
|
|
size_t bucket;
|
|
int err;
|
|
void *data, *new_data;
|
|
|
|
dst = _Py_hashtable_new_full(src->data_size, src->num_buckets,
|
|
src->hash_func, src->compare_func,
|
|
src->copy_data_func, src->free_data_func,
|
|
src->get_data_size_func, &src->alloc);
|
|
if (dst == NULL)
|
|
return NULL;
|
|
|
|
for (bucket=0; bucket < src->num_buckets; bucket++) {
|
|
entry = TABLE_HEAD(src, bucket);
|
|
for (; entry; entry = ENTRY_NEXT(entry)) {
|
|
if (src->copy_data_func) {
|
|
data = _Py_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
|
|
new_data = src->copy_data_func(data);
|
|
if (new_data != NULL)
|
|
err = _Py_hashtable_set(dst, entry->key,
|
|
&new_data, src->data_size);
|
|
else
|
|
err = 1;
|
|
}
|
|
else {
|
|
data = _PY_HASHTABLE_ENTRY_DATA(entry);
|
|
err = _Py_hashtable_set(dst, entry->key, data, src->data_size);
|
|
}
|
|
if (err) {
|
|
_Py_hashtable_destroy(dst);
|
|
return NULL;
|
|
}
|
|
}
|
|
}
|
|
return dst;
|
|
}
|
|
|