bpo-40602: Optimize _Py_hashtable for pointer keys (GH-20051)

Optimize _Py_hashtable_get() and _Py_hashtable_get_entry() for
pointer keys:

* key_size == sizeof(void*)
* hash_func == _Py_hashtable_hash_ptr
* compare_func == _Py_hashtable_compare_direct

Changes:

* Add get_func and get_entry_func members to _Py_hashtable_t
* Convert _Py_hashtable_get() and _Py_hashtable_get_entry() functions
  to static nline functions.
* Add specialized get and get entry for pointer keys.
This commit is contained in:
Victor Stinner 2020-05-12 13:31:59 +02:00 committed by GitHub
parent 74ea6b5a75
commit 7c6e970775
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 154 additions and 95 deletions

View File

@ -76,12 +76,17 @@ typedef struct {
/* Forward declaration */
struct _Py_hashtable_t;
typedef struct _Py_hashtable_t _Py_hashtable_t;
typedef Py_uhash_t (*_Py_hashtable_hash_func) (struct _Py_hashtable_t *ht,
typedef Py_uhash_t (*_Py_hashtable_hash_func) (_Py_hashtable_t *ht,
const void *pkey);
typedef int (*_Py_hashtable_compare_func) (struct _Py_hashtable_t *ht,
typedef int (*_Py_hashtable_compare_func) (_Py_hashtable_t *ht,
const void *pkey,
const _Py_hashtable_entry_t *he);
typedef _Py_hashtable_entry_t* (*_Py_hashtable_get_entry_func)(_Py_hashtable_t *ht,
const void *pkey);
typedef int (*_Py_hashtable_get_func) (_Py_hashtable_t *ht,
const void *pkey, void *data);
typedef struct {
/* allocate a memory block */
@ -93,18 +98,19 @@ typedef struct {
/* _Py_hashtable: table */
typedef struct _Py_hashtable_t {
struct _Py_hashtable_t {
size_t num_buckets;
size_t entries; /* Total number of entries in the table. */
_Py_slist_t *buckets;
size_t key_size;
size_t data_size;
_Py_hashtable_get_func get_func;
_Py_hashtable_get_entry_func get_entry_func;
_Py_hashtable_hash_func hash_func;
_Py_hashtable_compare_func compare_func;
_Py_hashtable_allocator_t alloc;
} _Py_hashtable_t;
};
/* hash a pointer (void*) */
PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(
@ -176,10 +182,12 @@ PyAPI_FUNC(int) _Py_hashtable_set(
Don't call directly this function, but use _Py_HASHTABLE_GET_ENTRY()
macro */
PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
_Py_hashtable_t *ht,
size_t key_size,
const void *pkey);
static inline _Py_hashtable_entry_t *
_Py_hashtable_get_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
{
assert(key_size == ht->key_size);
return ht->get_entry_func(ht, pkey);
}
#define _Py_HASHTABLE_GET_ENTRY(TABLE, KEY) \
_Py_hashtable_get_entry(TABLE, sizeof(KEY), &(KEY))
@ -189,12 +197,14 @@ PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
exists, return 0 if the entry does not exist.
Don't call directly this function, but use _Py_HASHTABLE_GET() macro */
PyAPI_FUNC(int) _Py_hashtable_get(
_Py_hashtable_t *ht,
size_t key_size,
const void *pkey,
size_t data_size,
void *data);
static inline int
_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
size_t data_size, void *data)
{
assert(key_size == ht->key_size);
assert(data_size == ht->data_size);
return ht->get_func(ht, pkey, data);
}
#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \
_Py_hashtable_get(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))

View File

@ -108,7 +108,6 @@ Py_uhash_t
_Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey)
{
void *key;
_Py_HASHTABLE_READ_KEY(ht, pkey, key);
return (Py_uhash_t)_Py_HashPointer(key);
}
@ -137,61 +136,6 @@ round_size(size_t s)
}
_Py_hashtable_t *
_Py_hashtable_new_full(size_t key_size, size_t data_size,
size_t init_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func,
_Py_hashtable_allocator_t *allocator)
{
_Py_hashtable_t *ht;
size_t buckets_size;
_Py_hashtable_allocator_t alloc;
if (allocator == NULL) {
alloc.malloc = PyMem_Malloc;
alloc.free = PyMem_Free;
}
else {
alloc = *allocator;
}
ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
if (ht == NULL)
return ht;
ht->num_buckets = round_size(init_size);
ht->entries = 0;
ht->key_size = key_size;
ht->data_size = data_size;
buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
ht->buckets = alloc.malloc(buckets_size);
if (ht->buckets == NULL) {
alloc.free(ht);
return NULL;
}
memset(ht->buckets, 0, buckets_size);
ht->hash_func = hash_func;
ht->compare_func = compare_func;
ht->alloc = alloc;
return ht;
}
_Py_hashtable_t *
_Py_hashtable_new(size_t key_size, size_t data_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func)
{
return _Py_hashtable_new_full(key_size, data_size,
HASHTABLE_MIN_SIZE,
hash_func, compare_func,
NULL);
}
size_t
_Py_hashtable_size(_Py_hashtable_t *ht)
{
@ -251,23 +195,20 @@ _Py_hashtable_print_stats(_Py_hashtable_t *ht)
_Py_hashtable_entry_t *
_Py_hashtable_get_entry(_Py_hashtable_t *ht,
size_t key_size, const void *pkey)
_Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *pkey)
{
Py_uhash_t key_hash;
size_t index;
_Py_hashtable_entry_t *entry;
assert(key_size == ht->key_size);
key_hash = ht->hash_func(ht, pkey);
index = key_hash & (ht->num_buckets - 1);
for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
Py_uhash_t key_hash = ht->hash_func(ht, pkey);
size_t index = key_hash & (ht->num_buckets - 1);
_Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
while (1) {
if (entry == NULL) {
return NULL;
}
if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry)) {
break;
}
entry = ENTRY_NEXT(entry);
}
return entry;
}
@ -324,7 +265,7 @@ _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
/* Don't write the assertion on a single line because it is interesting
to know the duplicated entry if the assertion failed. The entry can
be read using a debugger. */
entry = _Py_hashtable_get_entry(ht, key_size, pkey);
entry = ht->get_entry_func(ht, pkey);
assert(entry == NULL);
#endif
@ -352,18 +293,62 @@ _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
int
_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
size_t data_size, void *data)
_Py_hashtable_get_generic(_Py_hashtable_t *ht, const void *pkey, void *data)
{
_Py_hashtable_entry_t *entry;
assert(data != NULL);
entry = _Py_hashtable_get_entry(ht, key_size, pkey);
if (entry == NULL)
_Py_hashtable_entry_t *entry = ht->get_entry_func(ht, pkey);
if (entry != NULL) {
ENTRY_READ_PDATA(ht, entry, ht->data_size, data);
return 1;
}
else {
return 0;
ENTRY_READ_PDATA(ht, entry, data_size, data);
return 1;
}
}
// Specialized for:
// key_size == sizeof(void*)
// hash_func == _Py_hashtable_hash_ptr
// compare_func == _Py_hashtable_compare_direct
_Py_hashtable_entry_t *
_Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *pkey)
{
Py_uhash_t key_hash = _Py_hashtable_hash_ptr(ht, pkey);
size_t index = key_hash & (ht->num_buckets - 1);
_Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
while (1) {
if (entry == NULL) {
return NULL;
}
if (entry->key_hash == key_hash) {
const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry);
if (memcmp(pkey, pkey2, sizeof(void*)) == 0) {
break;
}
}
entry = ENTRY_NEXT(entry);
}
return entry;
}
// Specialized for:
// key_size == sizeof(void*)
// hash_func == _Py_hashtable_hash_ptr
// compare_func == _Py_hashtable_compare_direct
int
_Py_hashtable_get_ptr(_Py_hashtable_t *ht, const void *pkey, void *data)
{
assert(data != NULL);
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry_ptr(ht, pkey);
if (entry != NULL) {
ENTRY_READ_PDATA(ht, entry, ht->data_size, data);
return 1;
}
else {
return 0;
}
}
@ -454,6 +439,70 @@ hashtable_rehash(_Py_hashtable_t *ht)
}
_Py_hashtable_t *
_Py_hashtable_new_full(size_t key_size, size_t data_size,
size_t init_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func,
_Py_hashtable_allocator_t *allocator)
{
_Py_hashtable_t *ht;
size_t buckets_size;
_Py_hashtable_allocator_t alloc;
if (allocator == NULL) {
alloc.malloc = PyMem_Malloc;
alloc.free = PyMem_Free;
}
else {
alloc = *allocator;
}
ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
if (ht == NULL)
return ht;
ht->num_buckets = round_size(init_size);
ht->entries = 0;
ht->key_size = key_size;
ht->data_size = data_size;
buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
ht->buckets = alloc.malloc(buckets_size);
if (ht->buckets == NULL) {
alloc.free(ht);
return NULL;
}
memset(ht->buckets, 0, buckets_size);
ht->get_func = _Py_hashtable_get_generic;
ht->get_entry_func = _Py_hashtable_get_entry_generic;
ht->hash_func = hash_func;
ht->compare_func = compare_func;
ht->alloc = alloc;
if (ht->key_size == sizeof(void*)
&& ht->hash_func == _Py_hashtable_hash_ptr
&& ht->compare_func == _Py_hashtable_compare_direct)
{
ht->get_func = _Py_hashtable_get_ptr;
ht->get_entry_func = _Py_hashtable_get_entry_ptr;
}
return ht;
}
_Py_hashtable_t *
_Py_hashtable_new(size_t key_size, size_t data_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func)
{
return _Py_hashtable_new_full(key_size, data_size,
HASHTABLE_MIN_SIZE,
hash_func, compare_func,
NULL);
}
void
_Py_hashtable_clear(_Py_hashtable_t *ht)
{