2020-06-08 11:30:33 -03:00
|
|
|
/* Bit and bytes utilities.
|
|
|
|
|
|
|
|
Bytes swap functions, reverse order of bytes:
|
2020-04-17 12:47:20 -03:00
|
|
|
|
|
|
|
- _Py_bswap16(uint16_t)
|
|
|
|
- _Py_bswap32(uint32_t)
|
|
|
|
- _Py_bswap64(uint64_t)
|
|
|
|
*/
|
|
|
|
|
2020-06-15 09:33:48 -03:00
|
|
|
#ifndef Py_INTERNAL_BITUTILS_H
|
|
|
|
#define Py_INTERNAL_BITUTILS_H
|
2020-04-17 12:47:20 -03:00
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef Py_BUILD_CORE
|
|
|
|
# error "this header requires Py_BUILD_CORE define"
|
|
|
|
#endif
|
|
|
|
|
2020-11-13 10:38:17 -04:00
|
|
|
#if defined(__GNUC__) \
|
|
|
|
&& ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8))
|
|
|
|
/* __builtin_bswap16() is available since GCC 4.8,
|
2020-04-17 12:47:20 -03:00
|
|
|
__builtin_bswap32() is available since GCC 4.3,
|
|
|
|
__builtin_bswap64() is available since GCC 4.3. */
|
|
|
|
# define _PY_HAVE_BUILTIN_BSWAP
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef _MSC_VER
|
2023-08-21 15:05:59 -03:00
|
|
|
# include <intrin.h> // _byteswap_uint64()
|
2020-04-17 12:47:20 -03:00
|
|
|
#endif
|
|
|
|
|
2023-08-21 15:05:59 -03:00
|
|
|
|
2020-04-17 12:47:20 -03:00
|
|
|
static inline uint16_t
|
|
|
|
_Py_bswap16(uint16_t word)
|
|
|
|
{
|
2020-11-13 10:38:17 -04:00
|
|
|
#if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap16)
|
2020-04-17 12:47:20 -03:00
|
|
|
return __builtin_bswap16(word);
|
|
|
|
#elif defined(_MSC_VER)
|
|
|
|
Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned short));
|
|
|
|
return _byteswap_ushort(word);
|
|
|
|
#else
|
|
|
|
// Portable implementation which doesn't rely on circular bit shift
|
|
|
|
return ( ((word & UINT16_C(0x00FF)) << 8)
|
|
|
|
| ((word & UINT16_C(0xFF00)) >> 8));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint32_t
|
|
|
|
_Py_bswap32(uint32_t word)
|
|
|
|
{
|
2020-11-13 10:38:17 -04:00
|
|
|
#if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap32)
|
2020-04-17 12:47:20 -03:00
|
|
|
return __builtin_bswap32(word);
|
|
|
|
#elif defined(_MSC_VER)
|
|
|
|
Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long));
|
|
|
|
return _byteswap_ulong(word);
|
|
|
|
#else
|
|
|
|
// Portable implementation which doesn't rely on circular bit shift
|
|
|
|
return ( ((word & UINT32_C(0x000000FF)) << 24)
|
|
|
|
| ((word & UINT32_C(0x0000FF00)) << 8)
|
|
|
|
| ((word & UINT32_C(0x00FF0000)) >> 8)
|
|
|
|
| ((word & UINT32_C(0xFF000000)) >> 24));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint64_t
|
|
|
|
_Py_bswap64(uint64_t word)
|
|
|
|
{
|
2020-11-13 10:38:17 -04:00
|
|
|
#if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap64)
|
2020-04-17 12:47:20 -03:00
|
|
|
return __builtin_bswap64(word);
|
|
|
|
#elif defined(_MSC_VER)
|
|
|
|
return _byteswap_uint64(word);
|
|
|
|
#else
|
|
|
|
// Portable implementation which doesn't rely on circular bit shift
|
|
|
|
return ( ((word & UINT64_C(0x00000000000000FF)) << 56)
|
|
|
|
| ((word & UINT64_C(0x000000000000FF00)) << 40)
|
|
|
|
| ((word & UINT64_C(0x0000000000FF0000)) << 24)
|
|
|
|
| ((word & UINT64_C(0x00000000FF000000)) << 8)
|
|
|
|
| ((word & UINT64_C(0x000000FF00000000)) >> 8)
|
|
|
|
| ((word & UINT64_C(0x0000FF0000000000)) >> 24)
|
|
|
|
| ((word & UINT64_C(0x00FF000000000000)) >> 40)
|
|
|
|
| ((word & UINT64_C(0xFF00000000000000)) >> 56));
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-06-08 11:30:33 -03:00
|
|
|
// Population count: count the number of 1's in 'x'
|
|
|
|
// (number of bits set to 1), also known as the hamming weight.
|
|
|
|
//
|
|
|
|
// Implementation note. CPUID is not used, to test if x86 POPCNT instruction
|
|
|
|
// can be used, to keep the implementation simple. For example, Visual Studio
|
|
|
|
// __popcnt() is not used this reason. The clang and GCC builtin function can
|
|
|
|
// use the x86 POPCNT instruction if the target architecture has SSE4a or
|
|
|
|
// newer.
|
|
|
|
static inline int
|
|
|
|
_Py_popcount32(uint32_t x)
|
|
|
|
{
|
|
|
|
#if (defined(__clang__) || defined(__GNUC__))
|
|
|
|
|
|
|
|
#if SIZEOF_INT >= 4
|
|
|
|
Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int));
|
|
|
|
return __builtin_popcount(x);
|
|
|
|
#else
|
|
|
|
// The C standard guarantees that unsigned long will always be big enough
|
|
|
|
// to hold a uint32_t value without losing information.
|
|
|
|
Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long));
|
|
|
|
return __builtin_popcountl(x);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#else
|
|
|
|
// 32-bit SWAR (SIMD Within A Register) popcount
|
|
|
|
|
|
|
|
// Binary: 0 1 0 1 ...
|
|
|
|
const uint32_t M1 = 0x55555555;
|
|
|
|
// Binary: 00 11 00 11. ..
|
|
|
|
const uint32_t M2 = 0x33333333;
|
|
|
|
// Binary: 0000 1111 0000 1111 ...
|
|
|
|
const uint32_t M4 = 0x0F0F0F0F;
|
|
|
|
|
|
|
|
// Put count of each 2 bits into those 2 bits
|
|
|
|
x = x - ((x >> 1) & M1);
|
|
|
|
// Put count of each 4 bits into those 4 bits
|
|
|
|
x = (x & M2) + ((x >> 2) & M2);
|
|
|
|
// Put count of each 8 bits into those 8 bits
|
|
|
|
x = (x + (x >> 4)) & M4;
|
2022-01-23 05:59:34 -04:00
|
|
|
// Sum of the 4 byte counts.
|
|
|
|
// Take care when considering changes to the next line. Portability and
|
|
|
|
// correctness are delicate here, thanks to C's "integer promotions" (C99
|
|
|
|
// §6.3.1.1p2). On machines where the `int` type has width greater than 32
|
|
|
|
// bits, `x` will be promoted to an `int`, and following C's "usual
|
|
|
|
// arithmetic conversions" (C99 §6.3.1.8), the multiplication will be
|
|
|
|
// performed as a multiplication of two `unsigned int` operands. In this
|
|
|
|
// case it's critical that we cast back to `uint32_t` in order to keep only
|
|
|
|
// the least significant 32 bits. On machines where the `int` type has
|
|
|
|
// width no greater than 32, the multiplication is of two 32-bit unsigned
|
|
|
|
// integer types, and the (uint32_t) cast is a no-op. In both cases, we
|
|
|
|
// avoid the risk of undefined behaviour due to overflow of a
|
|
|
|
// multiplication of signed integer types.
|
|
|
|
return (uint32_t)(x * 0x01010101U) >> 24;
|
2020-06-08 11:30:33 -03:00
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-06-15 09:33:48 -03:00
|
|
|
// Return the index of the most significant 1 bit in 'x'. This is the smallest
|
|
|
|
// integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0.
|
|
|
|
static inline int
|
|
|
|
_Py_bit_length(unsigned long x)
|
|
|
|
{
|
|
|
|
#if (defined(__clang__) || defined(__GNUC__))
|
|
|
|
if (x != 0) {
|
|
|
|
// __builtin_clzl() is available since GCC 3.4.
|
|
|
|
// Undefined behavior for x == 0.
|
|
|
|
return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#elif defined(_MSC_VER)
|
|
|
|
// _BitScanReverse() is documented to search 32 bits.
|
|
|
|
Py_BUILD_ASSERT(sizeof(unsigned long) <= 4);
|
|
|
|
unsigned long msb;
|
|
|
|
if (_BitScanReverse(&msb, x)) {
|
|
|
|
return (int)msb + 1;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
const int BIT_LENGTH_TABLE[32] = {
|
|
|
|
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
|
|
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
|
|
|
|
};
|
|
|
|
int msb = 0;
|
|
|
|
while (x >= 32) {
|
|
|
|
msb += 6;
|
|
|
|
x >>= 6;
|
|
|
|
}
|
|
|
|
msb += BIT_LENGTH_TABLE[x];
|
|
|
|
return msb;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-04-17 12:47:20 -03:00
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
2020-06-15 09:33:48 -03:00
|
|
|
#endif /* !Py_INTERNAL_BITUTILS_H */
|