From b7a80d543e1e94475ab9c8214f7a9eab4e63c9ab Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Tue, 23 Jan 2018 22:17:04 -0500 Subject: [PATCH] bpo-32436: Don't use native popcount() (also fixes bpo-32641) (#5292) --- Python/context.c | 2 +- Python/hamt.c | 26 ++++++++++++-------------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/Python/context.c b/Python/context.c index 2f1d0f5c342..5439531cd1e 100644 --- a/Python/context.c +++ b/Python/context.c @@ -1171,7 +1171,7 @@ get_token_missing(void) int PyContext_ClearFreeList(void) { - int size = ctx_freelist_len; + Py_ssize_t size = ctx_freelist_len; while (ctx_freelist_len) { PyContext *ctx = ctx_freelist; ctx_freelist = (PyContext *)ctx->ctx_weakreflist; diff --git a/Python/hamt.c b/Python/hamt.c index af3dfce43c8..81629e9bf1c 100644 --- a/Python/hamt.c +++ b/Python/hamt.c @@ -4,11 +4,6 @@ #include "internal/pystate.h" #include "internal/hamt.h" -/* popcnt support in Visual Studio */ -#ifdef _MSC_VER -#include -#endif - /* This file provides an implemention of an immutable mapping using the Hash Array Mapped Trie (or HAMT) datastructure. @@ -440,18 +435,21 @@ hamt_bitpos(int32_t hash, uint32_t shift) static inline uint32_t hamt_bitcount(uint32_t i) { -#if defined(__GNUC__) && (__GNUC__ > 4) - return (uint32_t)__builtin_popcountl(i); -#elif defined(__clang__) && (__clang_major__ > 3) - return (uint32_t)__builtin_popcountl(i); -#elif defined(_MSC_VER) - return (uint32_t)__popcnt(i); -#else - /* https://graphics.stanford.edu/~seander/bithacks.html */ + /* We could use native popcount instruction but that would + require to either add configure flags to enable SSE4.2 + support or to detect it dynamically. Otherwise, we have + a risk of CPython not working properly on older hardware. + + In practice, there's no observable difference in + performance between using a popcount instruction or the + following fallback code. + + The algorithm is copied from: + https://graphics.stanford.edu/~seander/bithacks.html + */ i = i - ((i >> 1) & 0x55555555); i = (i & 0x33333333) + ((i >> 2) & 0x33333333); return ((i + (i >> 4) & 0xF0F0F0F) * 0x1010101) >> 24; -#endif } static inline uint32_t