From 794e7d1ab2d7afe70fe0dd87ca8174ac860413e4 Mon Sep 17 00:00:00 2001 From: Niklas Fiekas Date: Mon, 15 Jun 2020 14:33:48 +0200 Subject: [PATCH] bpo-29782: Consolidate _Py_Bit_Length() (GH-20739) In GH-2866, _Py_Bit_Length() was added to pymath.h for lack of a better location. GH-20518 added a more appropriate header file for bit utilities. It also shows how to properly use intrinsics. This allows reconsidering bpo-29782. * Move the function to the new header. * Changed return type to match __builtin_clzl() and reviewed usage. * Use intrinsics where available. * Pick a fallback implementation suitable for inlining. --- Include/internal/pycore_bitutils.h | 47 +++++++++++++++++++++++++++--- Include/pymath.h | 8 ----- Modules/_testinternalcapi.c | 40 +++++++++++++++++++++++++ Modules/mathmodule.c | 1 + Objects/longobject.c | 25 ++++++++++------ Python/pymath.c | 15 ---------- 6 files changed, 100 insertions(+), 36 deletions(-) diff --git a/Include/internal/pycore_bitutils.h b/Include/internal/pycore_bitutils.h index 36ffe23b9ff..0bd3270fe82 100644 --- a/Include/internal/pycore_bitutils.h +++ b/Include/internal/pycore_bitutils.h @@ -7,8 +7,8 @@ - _Py_bswap64(uint64_t) */ -#ifndef Py_INTERNAL_BSWAP_H -#define Py_INTERNAL_BSWAP_H +#ifndef Py_INTERNAL_BITUTILS_H +#define Py_INTERNAL_BITUTILS_H #ifdef __cplusplus extern "C" { #endif @@ -131,8 +131,47 @@ _Py_popcount32(uint32_t x) } +// Return the index of the most significant 1 bit in 'x'. This is the smallest +// integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0. +static inline int +_Py_bit_length(unsigned long x) +{ +#if (defined(__clang__) || defined(__GNUC__)) + if (x != 0) { + // __builtin_clzl() is available since GCC 3.4. + // Undefined behavior for x == 0. + return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x); + } + else { + return 0; + } +#elif defined(_MSC_VER) + // _BitScanReverse() is documented to search 32 bits. + Py_BUILD_ASSERT(sizeof(unsigned long) <= 4); + unsigned long msb; + if (_BitScanReverse(&msb, x)) { + return (int)msb + 1; + } + else { + return 0; + } +#else + const int BIT_LENGTH_TABLE[32] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 + }; + int msb = 0; + while (x >= 32) { + msb += 6; + x >>= 6; + } + msb += BIT_LENGTH_TABLE[x]; + return msb; +#endif +} + + #ifdef __cplusplus } #endif -#endif /* !Py_INTERNAL_BSWAP_H */ - +#endif /* !Py_INTERNAL_BITUTILS_H */ diff --git a/Include/pymath.h b/Include/pymath.h index 63ca972784e..f869724334a 100644 --- a/Include/pymath.h +++ b/Include/pymath.h @@ -227,12 +227,4 @@ PyAPI_FUNC(void) _Py_set_387controlword(unsigned short); * behavior. */ #define _Py_InIntegralTypeRange(type, v) (_Py_IntegralTypeMin(type) <= v && v <= _Py_IntegralTypeMax(type)) -/* Return the smallest integer k such that n < 2**k, or 0 if n == 0. - * Equivalent to floor(log2(x))+1. Also equivalent to: bitwidth_of_type - - * count_leading_zero_bits(x) - */ -#ifndef Py_LIMITED_API -PyAPI_FUNC(unsigned int) _Py_bit_length(unsigned long d); -#endif - #endif /* Py_PYMATH_H */ diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 6d5af5917f1..7970e2f4f44 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -102,6 +102,45 @@ test_popcount(PyObject *self, PyObject *Py_UNUSED(args)) } +static int +check_bit_length(unsigned long x, int expected) +{ + // Use volatile to prevent the compiler to optimize out the whole test + volatile unsigned long u = x; + int len = _Py_bit_length(u); + if (len != expected) { + PyErr_Format(PyExc_AssertionError, + "_Py_bit_length(%lu) returns %i, expected %i", + x, len, expected); + return -1; + } + return 0; +} + + +static PyObject* +test_bit_length(PyObject *self, PyObject *Py_UNUSED(args)) +{ +#define CHECK(X, RESULT) \ + do { \ + if (check_bit_length(X, RESULT) < 0) { \ + return NULL; \ + } \ + } while (0) + + CHECK(0, 0); + CHECK(1, 1); + CHECK(0x1000, 13); + CHECK(0x1234, 13); + CHECK(0x54321, 19); + CHECK(0x7FFFFFFF, 31); + CHECK(0xFFFFFFFF, 32); + Py_RETURN_NONE; + +#undef CHECK +} + + #define TO_PTR(ch) ((void*)(uintptr_t)ch) #define FROM_PTR(ptr) ((uintptr_t)ptr) #define VALUE(key) (1 + ((int)(key) - 'a')) @@ -197,6 +236,7 @@ static PyMethodDef TestMethods[] = { {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, {"test_bswap", test_bswap, METH_NOARGS}, {"test_popcount", test_popcount, METH_NOARGS}, + {"test_bit_length", test_bit_length, METH_NOARGS}, {"test_hashtable", test_hashtable, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index cb05ce7c509..4450ce18941 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -53,6 +53,7 @@ raised for division by zero and mod by zero. */ #include "Python.h" +#include "pycore_bitutils.h" // _Py_bit_length() #include "pycore_dtoa.h" #include "_math.h" diff --git a/Objects/longobject.c b/Objects/longobject.c index dead3e30694..d92a9c56a72 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -695,6 +695,13 @@ _PyLong_Sign(PyObject *vv) return Py_SIZE(v) == 0 ? 0 : (Py_SIZE(v) < 0 ? -1 : 1); } +static int +bit_length_digit(digit x) +{ + Py_BUILD_ASSERT(PyLong_SHIFT <= sizeof(unsigned long) * 8); + return _Py_bit_length((unsigned long)x); +} + size_t _PyLong_NumBits(PyObject *vv) { @@ -712,7 +719,7 @@ _PyLong_NumBits(PyObject *vv) if ((size_t)(ndigits - 1) > SIZE_MAX / (size_t)PyLong_SHIFT) goto Overflow; result = (size_t)(ndigits - 1) * (size_t)PyLong_SHIFT; - msd_bits = _Py_bit_length(msd); + msd_bits = bit_length_digit(msd); if (SIZE_MAX - msd_bits < result) goto Overflow; result += msd_bits; @@ -1822,7 +1829,7 @@ long_format_binary(PyObject *aa, int base, int alternate, return -1; } size_a_in_bits = (size_a - 1) * PyLong_SHIFT + - _Py_bit_length(a->ob_digit[size_a - 1]); + bit_length_digit(a->ob_digit[size_a - 1]); /* Allow 1 character for a '-' sign. */ sz = negative + (size_a_in_bits + (bits - 1)) / bits; } @@ -2642,7 +2649,7 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem) /* normalize: shift w1 left so that its top digit is >= PyLong_BASE/2. shift v1 left by the same amount. Results go into w and v. */ - d = PyLong_SHIFT - _Py_bit_length(w1->ob_digit[size_w-1]); + d = PyLong_SHIFT - bit_length_digit(w1->ob_digit[size_w-1]); carry = v_lshift(w->ob_digit, w1->ob_digit, size_w, d); assert(carry == 0); carry = v_lshift(v->ob_digit, v1->ob_digit, size_v, d); @@ -2764,7 +2771,7 @@ _PyLong_Frexp(PyLongObject *a, Py_ssize_t *e) *e = 0; return 0.0; } - a_bits = _Py_bit_length(a->ob_digit[a_size-1]); + a_bits = bit_length_digit(a->ob_digit[a_size-1]); /* The following is an overflow-free version of the check "if ((a_size - 1) * PyLong_SHIFT + a_bits > PY_SSIZE_T_MAX) ..." */ if (a_size >= (PY_SSIZE_T_MAX - 1) / PyLong_SHIFT + 1 && @@ -3857,8 +3864,8 @@ long_true_divide(PyObject *v, PyObject *w) /* Extreme underflow */ goto underflow_or_zero; /* Next line is now safe from overflowing a Py_ssize_t */ - diff = diff * PyLong_SHIFT + _Py_bit_length(a->ob_digit[a_size - 1]) - - _Py_bit_length(b->ob_digit[b_size - 1]); + diff = diff * PyLong_SHIFT + bit_length_digit(a->ob_digit[a_size - 1]) - + bit_length_digit(b->ob_digit[b_size - 1]); /* Now diff = a_bits - b_bits. */ if (diff > DBL_MAX_EXP) goto overflow; @@ -3934,7 +3941,7 @@ long_true_divide(PyObject *v, PyObject *w) } x_size = Py_ABS(Py_SIZE(x)); assert(x_size > 0); /* result of division is never zero */ - x_bits = (x_size-1)*PyLong_SHIFT+_Py_bit_length(x->ob_digit[x_size-1]); + x_bits = (x_size-1)*PyLong_SHIFT+bit_length_digit(x->ob_digit[x_size-1]); /* The number of extra bits that have to be rounded away. */ extra_bits = Py_MAX(x_bits, DBL_MIN_EXP - shift) - DBL_MANT_DIG; @@ -4748,7 +4755,7 @@ _PyLong_GCD(PyObject *aarg, PyObject *barg) alloc_b = Py_SIZE(b); /* reduce until a fits into 2 digits */ while ((size_a = Py_SIZE(a)) > 2) { - nbits = _Py_bit_length(a->ob_digit[size_a-1]); + nbits = bit_length_digit(a->ob_digit[size_a-1]); /* extract top 2*PyLong_SHIFT bits of a into x, along with corresponding bits of b into y */ size_b = Py_SIZE(b); @@ -5269,7 +5276,7 @@ int_bit_length_impl(PyObject *self) return PyLong_FromLong(0); msd = ((PyLongObject *)self)->ob_digit[ndigits-1]; - msd_bits = _Py_bit_length(msd); + msd_bits = bit_length_digit(msd); if (ndigits <= PY_SSIZE_T_MAX/PyLong_SHIFT) return PyLong_FromSsize_t((ndigits-1)*PyLong_SHIFT + msd_bits); diff --git a/Python/pymath.c b/Python/pymath.c index a08a0e79615..24b804223ee 100644 --- a/Python/pymath.c +++ b/Python/pymath.c @@ -79,18 +79,3 @@ round(double x) return copysign(y, x); } #endif /* HAVE_ROUND */ - -static const unsigned int BitLengthTable[32] = { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 -}; - -unsigned int _Py_bit_length(unsigned long d) { - unsigned int d_bits = 0; - while (d >= 32) { - d_bits += 6; - d >>= 6; - } - d_bits += BitLengthTable[d]; - return d_bits; -}