bpo-29782: Consolidate _Py_Bit_Length() (GH-20739)
In GH-2866, _Py_Bit_Length() was added to pymath.h for lack of a better location. GH-20518 added a more appropriate header file for bit utilities. It also shows how to properly use intrinsics. This allows reconsidering bpo-29782. * Move the function to the new header. * Changed return type to match __builtin_clzl() and reviewed usage. * Use intrinsics where available. * Pick a fallback implementation suitable for inlining.
This commit is contained in:
parent
25f38d7044
commit
794e7d1ab2
|
@ -7,8 +7,8 @@
|
||||||
- _Py_bswap64(uint64_t)
|
- _Py_bswap64(uint64_t)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef Py_INTERNAL_BSWAP_H
|
#ifndef Py_INTERNAL_BITUTILS_H
|
||||||
#define Py_INTERNAL_BSWAP_H
|
#define Py_INTERNAL_BITUTILS_H
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
@ -131,8 +131,47 @@ _Py_popcount32(uint32_t x)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Return the index of the most significant 1 bit in 'x'. This is the smallest
|
||||||
|
// integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0.
|
||||||
|
static inline int
|
||||||
|
_Py_bit_length(unsigned long x)
|
||||||
|
{
|
||||||
|
#if (defined(__clang__) || defined(__GNUC__))
|
||||||
|
if (x != 0) {
|
||||||
|
// __builtin_clzl() is available since GCC 3.4.
|
||||||
|
// Undefined behavior for x == 0.
|
||||||
|
return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
// _BitScanReverse() is documented to search 32 bits.
|
||||||
|
Py_BUILD_ASSERT(sizeof(unsigned long) <= 4);
|
||||||
|
unsigned long msb;
|
||||||
|
if (_BitScanReverse(&msb, x)) {
|
||||||
|
return (int)msb + 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
const int BIT_LENGTH_TABLE[32] = {
|
||||||
|
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||||
|
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
|
||||||
|
};
|
||||||
|
int msb = 0;
|
||||||
|
while (x >= 32) {
|
||||||
|
msb += 6;
|
||||||
|
x >>= 6;
|
||||||
|
}
|
||||||
|
msb += BIT_LENGTH_TABLE[x];
|
||||||
|
return msb;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif /* !Py_INTERNAL_BSWAP_H */
|
#endif /* !Py_INTERNAL_BITUTILS_H */
|
||||||
|
|
||||||
|
|
|
@ -227,12 +227,4 @@ PyAPI_FUNC(void) _Py_set_387controlword(unsigned short);
|
||||||
* behavior. */
|
* behavior. */
|
||||||
#define _Py_InIntegralTypeRange(type, v) (_Py_IntegralTypeMin(type) <= v && v <= _Py_IntegralTypeMax(type))
|
#define _Py_InIntegralTypeRange(type, v) (_Py_IntegralTypeMin(type) <= v && v <= _Py_IntegralTypeMax(type))
|
||||||
|
|
||||||
/* Return the smallest integer k such that n < 2**k, or 0 if n == 0.
|
|
||||||
* Equivalent to floor(log2(x))+1. Also equivalent to: bitwidth_of_type -
|
|
||||||
* count_leading_zero_bits(x)
|
|
||||||
*/
|
|
||||||
#ifndef Py_LIMITED_API
|
|
||||||
PyAPI_FUNC(unsigned int) _Py_bit_length(unsigned long d);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /* Py_PYMATH_H */
|
#endif /* Py_PYMATH_H */
|
||||||
|
|
|
@ -102,6 +102,45 @@ test_popcount(PyObject *self, PyObject *Py_UNUSED(args))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int
|
||||||
|
check_bit_length(unsigned long x, int expected)
|
||||||
|
{
|
||||||
|
// Use volatile to prevent the compiler to optimize out the whole test
|
||||||
|
volatile unsigned long u = x;
|
||||||
|
int len = _Py_bit_length(u);
|
||||||
|
if (len != expected) {
|
||||||
|
PyErr_Format(PyExc_AssertionError,
|
||||||
|
"_Py_bit_length(%lu) returns %i, expected %i",
|
||||||
|
x, len, expected);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject*
|
||||||
|
test_bit_length(PyObject *self, PyObject *Py_UNUSED(args))
|
||||||
|
{
|
||||||
|
#define CHECK(X, RESULT) \
|
||||||
|
do { \
|
||||||
|
if (check_bit_length(X, RESULT) < 0) { \
|
||||||
|
return NULL; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
CHECK(0, 0);
|
||||||
|
CHECK(1, 1);
|
||||||
|
CHECK(0x1000, 13);
|
||||||
|
CHECK(0x1234, 13);
|
||||||
|
CHECK(0x54321, 19);
|
||||||
|
CHECK(0x7FFFFFFF, 31);
|
||||||
|
CHECK(0xFFFFFFFF, 32);
|
||||||
|
Py_RETURN_NONE;
|
||||||
|
|
||||||
|
#undef CHECK
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#define TO_PTR(ch) ((void*)(uintptr_t)ch)
|
#define TO_PTR(ch) ((void*)(uintptr_t)ch)
|
||||||
#define FROM_PTR(ptr) ((uintptr_t)ptr)
|
#define FROM_PTR(ptr) ((uintptr_t)ptr)
|
||||||
#define VALUE(key) (1 + ((int)(key) - 'a'))
|
#define VALUE(key) (1 + ((int)(key) - 'a'))
|
||||||
|
@ -197,6 +236,7 @@ static PyMethodDef TestMethods[] = {
|
||||||
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
|
{"get_recursion_depth", get_recursion_depth, METH_NOARGS},
|
||||||
{"test_bswap", test_bswap, METH_NOARGS},
|
{"test_bswap", test_bswap, METH_NOARGS},
|
||||||
{"test_popcount", test_popcount, METH_NOARGS},
|
{"test_popcount", test_popcount, METH_NOARGS},
|
||||||
|
{"test_bit_length", test_bit_length, METH_NOARGS},
|
||||||
{"test_hashtable", test_hashtable, METH_NOARGS},
|
{"test_hashtable", test_hashtable, METH_NOARGS},
|
||||||
{NULL, NULL} /* sentinel */
|
{NULL, NULL} /* sentinel */
|
||||||
};
|
};
|
||||||
|
|
|
@ -53,6 +53,7 @@ raised for division by zero and mod by zero.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "Python.h"
|
#include "Python.h"
|
||||||
|
#include "pycore_bitutils.h" // _Py_bit_length()
|
||||||
#include "pycore_dtoa.h"
|
#include "pycore_dtoa.h"
|
||||||
#include "_math.h"
|
#include "_math.h"
|
||||||
|
|
||||||
|
|
|
@ -695,6 +695,13 @@ _PyLong_Sign(PyObject *vv)
|
||||||
return Py_SIZE(v) == 0 ? 0 : (Py_SIZE(v) < 0 ? -1 : 1);
|
return Py_SIZE(v) == 0 ? 0 : (Py_SIZE(v) < 0 ? -1 : 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
bit_length_digit(digit x)
|
||||||
|
{
|
||||||
|
Py_BUILD_ASSERT(PyLong_SHIFT <= sizeof(unsigned long) * 8);
|
||||||
|
return _Py_bit_length((unsigned long)x);
|
||||||
|
}
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
_PyLong_NumBits(PyObject *vv)
|
_PyLong_NumBits(PyObject *vv)
|
||||||
{
|
{
|
||||||
|
@ -712,7 +719,7 @@ _PyLong_NumBits(PyObject *vv)
|
||||||
if ((size_t)(ndigits - 1) > SIZE_MAX / (size_t)PyLong_SHIFT)
|
if ((size_t)(ndigits - 1) > SIZE_MAX / (size_t)PyLong_SHIFT)
|
||||||
goto Overflow;
|
goto Overflow;
|
||||||
result = (size_t)(ndigits - 1) * (size_t)PyLong_SHIFT;
|
result = (size_t)(ndigits - 1) * (size_t)PyLong_SHIFT;
|
||||||
msd_bits = _Py_bit_length(msd);
|
msd_bits = bit_length_digit(msd);
|
||||||
if (SIZE_MAX - msd_bits < result)
|
if (SIZE_MAX - msd_bits < result)
|
||||||
goto Overflow;
|
goto Overflow;
|
||||||
result += msd_bits;
|
result += msd_bits;
|
||||||
|
@ -1822,7 +1829,7 @@ long_format_binary(PyObject *aa, int base, int alternate,
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
size_a_in_bits = (size_a - 1) * PyLong_SHIFT +
|
size_a_in_bits = (size_a - 1) * PyLong_SHIFT +
|
||||||
_Py_bit_length(a->ob_digit[size_a - 1]);
|
bit_length_digit(a->ob_digit[size_a - 1]);
|
||||||
/* Allow 1 character for a '-' sign. */
|
/* Allow 1 character for a '-' sign. */
|
||||||
sz = negative + (size_a_in_bits + (bits - 1)) / bits;
|
sz = negative + (size_a_in_bits + (bits - 1)) / bits;
|
||||||
}
|
}
|
||||||
|
@ -2642,7 +2649,7 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem)
|
||||||
|
|
||||||
/* normalize: shift w1 left so that its top digit is >= PyLong_BASE/2.
|
/* normalize: shift w1 left so that its top digit is >= PyLong_BASE/2.
|
||||||
shift v1 left by the same amount. Results go into w and v. */
|
shift v1 left by the same amount. Results go into w and v. */
|
||||||
d = PyLong_SHIFT - _Py_bit_length(w1->ob_digit[size_w-1]);
|
d = PyLong_SHIFT - bit_length_digit(w1->ob_digit[size_w-1]);
|
||||||
carry = v_lshift(w->ob_digit, w1->ob_digit, size_w, d);
|
carry = v_lshift(w->ob_digit, w1->ob_digit, size_w, d);
|
||||||
assert(carry == 0);
|
assert(carry == 0);
|
||||||
carry = v_lshift(v->ob_digit, v1->ob_digit, size_v, d);
|
carry = v_lshift(v->ob_digit, v1->ob_digit, size_v, d);
|
||||||
|
@ -2764,7 +2771,7 @@ _PyLong_Frexp(PyLongObject *a, Py_ssize_t *e)
|
||||||
*e = 0;
|
*e = 0;
|
||||||
return 0.0;
|
return 0.0;
|
||||||
}
|
}
|
||||||
a_bits = _Py_bit_length(a->ob_digit[a_size-1]);
|
a_bits = bit_length_digit(a->ob_digit[a_size-1]);
|
||||||
/* The following is an overflow-free version of the check
|
/* The following is an overflow-free version of the check
|
||||||
"if ((a_size - 1) * PyLong_SHIFT + a_bits > PY_SSIZE_T_MAX) ..." */
|
"if ((a_size - 1) * PyLong_SHIFT + a_bits > PY_SSIZE_T_MAX) ..." */
|
||||||
if (a_size >= (PY_SSIZE_T_MAX - 1) / PyLong_SHIFT + 1 &&
|
if (a_size >= (PY_SSIZE_T_MAX - 1) / PyLong_SHIFT + 1 &&
|
||||||
|
@ -3857,8 +3864,8 @@ long_true_divide(PyObject *v, PyObject *w)
|
||||||
/* Extreme underflow */
|
/* Extreme underflow */
|
||||||
goto underflow_or_zero;
|
goto underflow_or_zero;
|
||||||
/* Next line is now safe from overflowing a Py_ssize_t */
|
/* Next line is now safe from overflowing a Py_ssize_t */
|
||||||
diff = diff * PyLong_SHIFT + _Py_bit_length(a->ob_digit[a_size - 1]) -
|
diff = diff * PyLong_SHIFT + bit_length_digit(a->ob_digit[a_size - 1]) -
|
||||||
_Py_bit_length(b->ob_digit[b_size - 1]);
|
bit_length_digit(b->ob_digit[b_size - 1]);
|
||||||
/* Now diff = a_bits - b_bits. */
|
/* Now diff = a_bits - b_bits. */
|
||||||
if (diff > DBL_MAX_EXP)
|
if (diff > DBL_MAX_EXP)
|
||||||
goto overflow;
|
goto overflow;
|
||||||
|
@ -3934,7 +3941,7 @@ long_true_divide(PyObject *v, PyObject *w)
|
||||||
}
|
}
|
||||||
x_size = Py_ABS(Py_SIZE(x));
|
x_size = Py_ABS(Py_SIZE(x));
|
||||||
assert(x_size > 0); /* result of division is never zero */
|
assert(x_size > 0); /* result of division is never zero */
|
||||||
x_bits = (x_size-1)*PyLong_SHIFT+_Py_bit_length(x->ob_digit[x_size-1]);
|
x_bits = (x_size-1)*PyLong_SHIFT+bit_length_digit(x->ob_digit[x_size-1]);
|
||||||
|
|
||||||
/* The number of extra bits that have to be rounded away. */
|
/* The number of extra bits that have to be rounded away. */
|
||||||
extra_bits = Py_MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG;
|
extra_bits = Py_MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG;
|
||||||
|
@ -4748,7 +4755,7 @@ _PyLong_GCD(PyObject *aarg, PyObject *barg)
|
||||||
alloc_b = Py_SIZE(b);
|
alloc_b = Py_SIZE(b);
|
||||||
/* reduce until a fits into 2 digits */
|
/* reduce until a fits into 2 digits */
|
||||||
while ((size_a = Py_SIZE(a)) > 2) {
|
while ((size_a = Py_SIZE(a)) > 2) {
|
||||||
nbits = _Py_bit_length(a->ob_digit[size_a-1]);
|
nbits = bit_length_digit(a->ob_digit[size_a-1]);
|
||||||
/* extract top 2*PyLong_SHIFT bits of a into x, along with
|
/* extract top 2*PyLong_SHIFT bits of a into x, along with
|
||||||
corresponding bits of b into y */
|
corresponding bits of b into y */
|
||||||
size_b = Py_SIZE(b);
|
size_b = Py_SIZE(b);
|
||||||
|
@ -5269,7 +5276,7 @@ int_bit_length_impl(PyObject *self)
|
||||||
return PyLong_FromLong(0);
|
return PyLong_FromLong(0);
|
||||||
|
|
||||||
msd = ((PyLongObject *)self)->ob_digit[ndigits-1];
|
msd = ((PyLongObject *)self)->ob_digit[ndigits-1];
|
||||||
msd_bits = _Py_bit_length(msd);
|
msd_bits = bit_length_digit(msd);
|
||||||
|
|
||||||
if (ndigits <= PY_SSIZE_T_MAX/PyLong_SHIFT)
|
if (ndigits <= PY_SSIZE_T_MAX/PyLong_SHIFT)
|
||||||
return PyLong_FromSsize_t((ndigits-1)*PyLong_SHIFT + msd_bits);
|
return PyLong_FromSsize_t((ndigits-1)*PyLong_SHIFT + msd_bits);
|
||||||
|
|
|
@ -79,18 +79,3 @@ round(double x)
|
||||||
return copysign(y, x);
|
return copysign(y, x);
|
||||||
}
|
}
|
||||||
#endif /* HAVE_ROUND */
|
#endif /* HAVE_ROUND */
|
||||||
|
|
||||||
static const unsigned int BitLengthTable[32] = {
|
|
||||||
0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
|
|
||||||
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5
|
|
||||||
};
|
|
||||||
|
|
||||||
unsigned int _Py_bit_length(unsigned long d) {
|
|
||||||
unsigned int d_bits = 0;
|
|
||||||
while (d >= 32) {
|
|
||||||
d_bits += 6;
|
|
||||||
d >>= 6;
|
|
||||||
}
|
|
||||||
d_bits += BitLengthTable[d];
|
|
||||||
return d_bits;
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in New Issue