mirror of https://github.com/python/cpython
bpo-37295: Use constant-time comb() and perm() for larger n depending on k (GH-30305)
This commit is contained in:
parent
5c66414b55
commit
2d787971c6
|
@ -1889,8 +1889,8 @@ class MathTests(unittest.TestCase):
|
||||||
perm = math.perm
|
perm = math.perm
|
||||||
factorial = math.factorial
|
factorial = math.factorial
|
||||||
# Test if factorial definition is satisfied
|
# Test if factorial definition is satisfied
|
||||||
for n in range(100):
|
for n in range(500):
|
||||||
for k in range(n + 1):
|
for k in (range(n + 1) if n < 100 else range(30) if n < 200 else range(10)):
|
||||||
self.assertEqual(perm(n, k),
|
self.assertEqual(perm(n, k),
|
||||||
factorial(n) // factorial(n - k))
|
factorial(n) // factorial(n - k))
|
||||||
|
|
||||||
|
@ -1953,8 +1953,8 @@ class MathTests(unittest.TestCase):
|
||||||
comb = math.comb
|
comb = math.comb
|
||||||
factorial = math.factorial
|
factorial = math.factorial
|
||||||
# Test if factorial definition is satisfied
|
# Test if factorial definition is satisfied
|
||||||
for n in range(100):
|
for n in range(500):
|
||||||
for k in range(n + 1):
|
for k in (range(n + 1) if n < 100 else range(30) if n < 200 else range(10)):
|
||||||
self.assertEqual(comb(n, k), factorial(n)
|
self.assertEqual(comb(n, k), factorial(n)
|
||||||
// (factorial(k) * factorial(n - k)))
|
// (factorial(k) * factorial(n - k)))
|
||||||
|
|
||||||
|
|
|
@ -3225,6 +3225,123 @@ math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* least significant 64 bits of the odd part of factorial(n), for n in range(128).
|
||||||
|
|
||||||
|
Python code to generate the values:
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
for n in range(128):
|
||||||
|
fac = math.factorial(n)
|
||||||
|
fac_odd_part = fac // (fac & -fac)
|
||||||
|
reduced_fac_odd_part = fac_odd_part % (2**64)
|
||||||
|
print(f"{reduced_fac_odd_part:#018x}u")
|
||||||
|
*/
|
||||||
|
static const uint64_t reduced_factorial_odd_part[] = {
|
||||||
|
0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000003u,
|
||||||
|
0x0000000000000003u, 0x000000000000000fu, 0x000000000000002du, 0x000000000000013bu,
|
||||||
|
0x000000000000013bu, 0x0000000000000b13u, 0x000000000000375fu, 0x0000000000026115u,
|
||||||
|
0x000000000007233fu, 0x00000000005cca33u, 0x0000000002898765u, 0x00000000260eeeebu,
|
||||||
|
0x00000000260eeeebu, 0x0000000286fddd9bu, 0x00000016beecca73u, 0x000001b02b930689u,
|
||||||
|
0x00000870d9df20adu, 0x0000b141df4dae31u, 0x00079dd498567c1bu, 0x00af2e19afc5266du,
|
||||||
|
0x020d8a4d0f4f7347u, 0x335281867ec241efu, 0x9b3093d46fdd5923u, 0x5e1f9767cc5866b1u,
|
||||||
|
0x92dd23d6966aced7u, 0xa30d0f4f0a196e5bu, 0x8dc3e5a1977d7755u, 0x2ab8ce915831734bu,
|
||||||
|
0x2ab8ce915831734bu, 0x81d2a0bc5e5fdcabu, 0x9efcac82445da75bu, 0xbc8b95cf58cde171u,
|
||||||
|
0xa0e8444a1f3cecf9u, 0x4191deb683ce3ffdu, 0xddd3878bc84ebfc7u, 0xcb39a64b83ff3751u,
|
||||||
|
0xf8203f7993fc1495u, 0xbd2a2a78b35f4bddu, 0x84757be6b6d13921u, 0x3fbbcfc0b524988bu,
|
||||||
|
0xbd11ed47c8928df9u, 0x3c26b59e41c2f4c5u, 0x677a5137e883fdb3u, 0xff74e943b03b93ddu,
|
||||||
|
0xfe5ebbcb10b2bb97u, 0xb021f1de3235e7e7u, 0x33509eb2e743a58fu, 0x390f9da41279fb7du,
|
||||||
|
0xe5cb0154f031c559u, 0x93074695ba4ddb6du, 0x81c471caa636247fu, 0xe1347289b5a1d749u,
|
||||||
|
0x286f21c3f76ce2ffu, 0x00be84a2173e8ac7u, 0x1595065ca215b88bu, 0xf95877595b018809u,
|
||||||
|
0x9c2efe3c5516f887u, 0x373294604679382bu, 0xaf1ff7a888adcd35u, 0x18ddf279a2c5800bu,
|
||||||
|
0x18ddf279a2c5800bu, 0x505a90e2542582cbu, 0x5bacad2cd8d5dc2bu, 0xfe3152bcbff89f41u,
|
||||||
|
0xe1467e88bf829351u, 0xb8001adb9e31b4d5u, 0x2803ac06a0cbb91fu, 0x1904b5d698805799u,
|
||||||
|
0xe12a648b5c831461u, 0x3516abbd6160cfa9u, 0xac46d25f12fe036du, 0x78bfa1da906b00efu,
|
||||||
|
0xf6390338b7f111bdu, 0x0f25f80f538255d9u, 0x4ec8ca55b8db140fu, 0x4ff670740b9b30a1u,
|
||||||
|
0x8fd032443a07f325u, 0x80dfe7965c83eeb5u, 0xa3dc1714d1213afdu, 0x205b7bbfcdc62007u,
|
||||||
|
0xa78126bbe140a093u, 0x9de1dc61ca7550cfu, 0x84f0046d01b492c5u, 0x2d91810b945de0f3u,
|
||||||
|
0xf5408b7f6008aa71u, 0x43707f4863034149u, 0xdac65fb9679279d5u, 0xc48406e7d1114eb7u,
|
||||||
|
0xa7dc9ed3c88e1271u, 0xfb25b2efdb9cb30du, 0x1bebda0951c4df63u, 0x5c85e975580ee5bdu,
|
||||||
|
0x1591bc60082cb137u, 0x2c38606318ef25d7u, 0x76ca72f7c5c63e27u, 0xf04a75d17baa0915u,
|
||||||
|
0x77458175139ae30du, 0x0e6c1330bc1b9421u, 0xdf87d2b5797e8293u, 0xefa5c703e1e68925u,
|
||||||
|
0x2b6b1b3278b4f6e1u, 0xceee27b382394249u, 0xd74e3829f5dab91du, 0xfdb17989c26b5f1fu,
|
||||||
|
0xc1b7d18781530845u, 0x7b4436b2105a8561u, 0x7ba7c0418372a7d7u, 0x9dbc5c67feb6c639u,
|
||||||
|
0x502686d7f6ff6b8fu, 0x6101855406be7a1fu, 0x9956afb5806930e7u, 0xe1f0ee88af40f7c5u,
|
||||||
|
0x984b057bda5c1151u, 0x9a49819acc13ea05u, 0x8ef0dead0896ef27u, 0x71f7826efe292b21u,
|
||||||
|
0xad80a480e46986efu, 0x01cdc0ebf5e0c6f7u, 0x6e06f839968f68dbu, 0xdd5943ab56e76139u,
|
||||||
|
0xcdcf31bf8604c5e7u, 0x7e2b4a847054a1cbu, 0x0ca75697a4d3d0f5u, 0x4703f53ac514a98bu,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* inverses of reduced_factorial_odd_part values modulo 2**64.
|
||||||
|
|
||||||
|
Python code to generate the values:
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
for n in range(128):
|
||||||
|
fac = math.factorial(n)
|
||||||
|
fac_odd_part = fac // (fac & -fac)
|
||||||
|
inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
|
||||||
|
print(f"{inverted_fac_odd_part:#018x}u")
|
||||||
|
*/
|
||||||
|
static const uint64_t inverted_factorial_odd_part[] = {
|
||||||
|
0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0xaaaaaaaaaaaaaaabu,
|
||||||
|
0xaaaaaaaaaaaaaaabu, 0xeeeeeeeeeeeeeeefu, 0x4fa4fa4fa4fa4fa5u, 0x2ff2ff2ff2ff2ff3u,
|
||||||
|
0x2ff2ff2ff2ff2ff3u, 0x938cc70553e3771bu, 0xb71c27cddd93e49fu, 0xb38e3229fcdee63du,
|
||||||
|
0xe684bb63544a4cbfu, 0xc2f684917ca340fbu, 0xf747c9cba417526du, 0xbb26eb51d7bd49c3u,
|
||||||
|
0xbb26eb51d7bd49c3u, 0xb0a7efb985294093u, 0xbe4b8c69f259eabbu, 0x6854d17ed6dc4fb9u,
|
||||||
|
0xe1aa904c915f4325u, 0x3b8206df131cead1u, 0x79c6009fea76fe13u, 0xd8c5d381633cd365u,
|
||||||
|
0x4841f12b21144677u, 0x4a91ff68200b0d0fu, 0x8f9513a58c4f9e8bu, 0x2b3e690621a42251u,
|
||||||
|
0x4f520f00e03c04e7u, 0x2edf84ee600211d3u, 0xadcaa2764aaacdfdu, 0x161f4f9033f4fe63u,
|
||||||
|
0x161f4f9033f4fe63u, 0xbada2932ea4d3e03u, 0xcec189f3efaa30d3u, 0xf7475bb68330bf91u,
|
||||||
|
0x37eb7bf7d5b01549u, 0x46b35660a4e91555u, 0xa567c12d81f151f7u, 0x4c724007bb2071b1u,
|
||||||
|
0x0f4a0cce58a016bdu, 0xfa21068e66106475u, 0x244ab72b5a318ae1u, 0x366ce67e080d0f23u,
|
||||||
|
0xd666fdae5dd2a449u, 0xd740ddd0acc06a0du, 0xb050bbbb28e6f97bu, 0x70b003fe890a5c75u,
|
||||||
|
0xd03aabff83037427u, 0x13ec4ca72c783bd7u, 0x90282c06afdbd96fu, 0x4414ddb9db4a95d5u,
|
||||||
|
0xa2c68735ae6832e9u, 0xbf72d71455676665u, 0xa8469fab6b759b7fu, 0xc1e55b56e606caf9u,
|
||||||
|
0x40455630fc4a1cffu, 0x0120a7b0046d16f7u, 0xa7c3553b08faef23u, 0x9f0bfd1b08d48639u,
|
||||||
|
0xa433ffce9a304d37u, 0xa22ad1d53915c683u, 0xcb6cbc723ba5dd1du, 0x547fb1b8ab9d0ba3u,
|
||||||
|
0x547fb1b8ab9d0ba3u, 0x8f15a826498852e3u, 0x32e1a03f38880283u, 0x3de4cce63283f0c1u,
|
||||||
|
0x5dfe6667e4da95b1u, 0xfda6eeeef479e47du, 0xf14de991cc7882dfu, 0xe68db79247630ca9u,
|
||||||
|
0xa7d6db8207ee8fa1u, 0x255e1f0fcf034499u, 0xc9a8990e43dd7e65u, 0x3279b6f289702e0fu,
|
||||||
|
0xe7b5905d9b71b195u, 0x03025ba41ff0da69u, 0xb7df3d6d3be55aefu, 0xf89b212ebff2b361u,
|
||||||
|
0xfe856d095996f0adu, 0xd6e533e9fdf20f9du, 0xf8c0e84a63da3255u, 0xa677876cd91b4db7u,
|
||||||
|
0x07ed4f97780d7d9bu, 0x90a8705f258db62fu, 0xa41bbb2be31b1c0du, 0x6ec28690b038383bu,
|
||||||
|
0xdb860c3bb2edd691u, 0x0838286838a980f9u, 0x558417a74b36f77du, 0x71779afc3646ef07u,
|
||||||
|
0x743cda377ccb6e91u, 0x7fdf9f3fe89153c5u, 0xdc97d25df49b9a4bu, 0x76321a778eb37d95u,
|
||||||
|
0x7cbb5e27da3bd487u, 0x9cff4ade1a009de7u, 0x70eb166d05c15197u, 0xdcf0460b71d5fe3du,
|
||||||
|
0x5ac1ee5260b6a3c5u, 0xc922dedfdd78efe1u, 0xe5d381dc3b8eeb9bu, 0xd57e5347bafc6aadu,
|
||||||
|
0x86939040983acd21u, 0x395b9d69740a4ff9u, 0x1467299c8e43d135u, 0x5fe440fcad975cdfu,
|
||||||
|
0xcaa9a39794a6ca8du, 0xf61dbd640868dea1u, 0xac09d98d74843be7u, 0x2b103b9e1a6b4809u,
|
||||||
|
0x2ab92d16960f536fu, 0x6653323d5e3681dfu, 0xefd48c1c0624e2d7u, 0xa496fefe04816f0du,
|
||||||
|
0x1754a7b07bbdd7b1u, 0x23353c829a3852cdu, 0xbf831261abd59097u, 0x57a8e656df0618e1u,
|
||||||
|
0x16e9206c3100680fu, 0xadad4c6ee921dac7u, 0x635f2b3860265353u, 0xdd6d0059f44b3d09u,
|
||||||
|
0xac4dd6b894447dd7u, 0x42ea183eeaa87be3u, 0x15612d1550ee5b5du, 0x226fa19d656cb623u,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
|
||||||
|
|
||||||
|
Python code to generate the values:
|
||||||
|
|
||||||
|
import math
|
||||||
|
|
||||||
|
for n in range(128):
|
||||||
|
fac = math.factorial(n)
|
||||||
|
fac_trailing_zeros = (fac & -fac).bit_length() - 1
|
||||||
|
print(fac_trailing_zeros)
|
||||||
|
*/
|
||||||
|
|
||||||
|
static const uint8_t factorial_trailing_zeros[] = {
|
||||||
|
0, 0, 1, 1, 3, 3, 4, 4, 7, 7, 8, 8, 10, 10, 11, 11, // 0-15
|
||||||
|
15, 15, 16, 16, 18, 18, 19, 19, 22, 22, 23, 23, 25, 25, 26, 26, // 16-31
|
||||||
|
31, 31, 32, 32, 34, 34, 35, 35, 38, 38, 39, 39, 41, 41, 42, 42, // 32-47
|
||||||
|
46, 46, 47, 47, 49, 49, 50, 50, 53, 53, 54, 54, 56, 56, 57, 57, // 48-63
|
||||||
|
63, 63, 64, 64, 66, 66, 67, 67, 70, 70, 71, 71, 73, 73, 74, 74, // 64-79
|
||||||
|
78, 78, 79, 79, 81, 81, 82, 82, 85, 85, 86, 86, 88, 88, 89, 89, // 80-95
|
||||||
|
94, 94, 95, 95, 97, 97, 98, 98, 101, 101, 102, 102, 104, 104, 105, 105, // 96-111
|
||||||
|
109, 109, 110, 110, 112, 112, 113, 113, 116, 116, 117, 117, 119, 119, 120, 120, // 112-127
|
||||||
|
};
|
||||||
|
|
||||||
/* Number of permutations and combinations.
|
/* Number of permutations and combinations.
|
||||||
* P(n, k) = n! / (n-k)!
|
* P(n, k) = n! / (n-k)!
|
||||||
* C(n, k) = P(n, k) / k!
|
* C(n, k) = P(n, k) / k!
|
||||||
|
@ -3234,48 +3351,93 @@ math_prod_impl(PyObject *module, PyObject *iterable, PyObject *start)
|
||||||
static PyObject *
|
static PyObject *
|
||||||
perm_comb_small(unsigned long long n, unsigned long long k, int iscomb)
|
perm_comb_small(unsigned long long n, unsigned long long k, int iscomb)
|
||||||
{
|
{
|
||||||
/* long long is at least 64 bit */
|
|
||||||
static const unsigned long long fast_comb_limits[] = {
|
|
||||||
0, ULLONG_MAX, 4294967296ULL, 3329022, 102570, 13467, 3612, 1449, // 0-7
|
|
||||||
746, 453, 308, 227, 178, 147, 125, 110, // 8-15
|
|
||||||
99, 90, 84, 79, 75, 72, 69, 68, // 16-23
|
|
||||||
66, 65, 64, 63, 63, 62, 62, 62, // 24-31
|
|
||||||
};
|
|
||||||
static const unsigned long long fast_perm_limits[] = {
|
|
||||||
0, ULLONG_MAX, 4294967296ULL, 2642246, 65537, 7133, 1627, 568, // 0-7
|
|
||||||
259, 142, 88, 61, 45, 36, 30, // 8-14
|
|
||||||
};
|
|
||||||
|
|
||||||
if (k == 0) {
|
if (k == 0) {
|
||||||
return PyLong_FromLong(1);
|
return PyLong_FromLong(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* For small enough n and k the result fits in the 64-bit range and can
|
/* For small enough n and k the result fits in the 64-bit range and can
|
||||||
* be calculated without allocating intermediate PyLong objects. */
|
* be calculated without allocating intermediate PyLong objects. */
|
||||||
if (iscomb
|
|
||||||
? (k < Py_ARRAY_LENGTH(fast_comb_limits)
|
|
||||||
&& n <= fast_comb_limits[k])
|
|
||||||
: (k < Py_ARRAY_LENGTH(fast_perm_limits)
|
|
||||||
&& n <= fast_perm_limits[k]))
|
|
||||||
{
|
|
||||||
unsigned long long result = n;
|
|
||||||
if (iscomb) {
|
if (iscomb) {
|
||||||
|
/* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)
|
||||||
|
* fits into a uint64_t. Exclude k = 1, because the second fast
|
||||||
|
* path is faster for this case.*/
|
||||||
|
static const unsigned char fast_comb_limits1[] = {
|
||||||
|
0, 0, 127, 127, 127, 127, 127, 127, // 0-7
|
||||||
|
127, 127, 127, 127, 127, 127, 127, 127, // 8-15
|
||||||
|
116, 105, 97, 91, 86, 82, 78, 76, // 16-23
|
||||||
|
74, 72, 71, 70, 69, 68, 68, 67, // 24-31
|
||||||
|
67, 67, 67, // 32-34
|
||||||
|
};
|
||||||
|
if (k < Py_ARRAY_LENGTH(fast_comb_limits1) && n <= fast_comb_limits1[k]) {
|
||||||
|
/*
|
||||||
|
comb(n, k) fits into a uint64_t. We compute it as
|
||||||
|
|
||||||
|
comb_odd_part << shift
|
||||||
|
|
||||||
|
where 2**shift is the largest power of two dividing comb(n, k)
|
||||||
|
and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
|
||||||
|
calculated efficiently via arithmetic modulo 2**64, using three
|
||||||
|
lookups and two uint64_t multiplications.
|
||||||
|
*/
|
||||||
|
uint64_t comb_odd_part = reduced_factorial_odd_part[n]
|
||||||
|
* inverted_factorial_odd_part[k]
|
||||||
|
* inverted_factorial_odd_part[n - k];
|
||||||
|
int shift = factorial_trailing_zeros[n]
|
||||||
|
- factorial_trailing_zeros[k]
|
||||||
|
- factorial_trailing_zeros[n - k];
|
||||||
|
return PyLong_FromUnsignedLongLong(comb_odd_part << shift);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Maps k to the maximal n so that 2*k-1 <= n <= 127 and C(n, k)*k
|
||||||
|
* fits into a long long (which is at least 64 bit). Only contains
|
||||||
|
* items larger than in fast_comb_limits1. */
|
||||||
|
static const unsigned long long fast_comb_limits2[] = {
|
||||||
|
0, ULLONG_MAX, 4294967296ULL, 3329022, 102570, 13467, 3612, 1449, // 0-7
|
||||||
|
746, 453, 308, 227, 178, 147, // 8-13
|
||||||
|
};
|
||||||
|
if (k < Py_ARRAY_LENGTH(fast_comb_limits2) && n <= fast_comb_limits2[k]) {
|
||||||
|
/* C(n, k) = C(n, k-1) * (n-k+1) / k */
|
||||||
|
unsigned long long result = n;
|
||||||
for (unsigned long long i = 1; i < k;) {
|
for (unsigned long long i = 1; i < k;) {
|
||||||
result *= --n;
|
result *= --n;
|
||||||
result /= ++i;
|
result /= ++i;
|
||||||
}
|
}
|
||||||
|
return PyLong_FromUnsignedLongLong(result);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
/* Maps k to the maximal n so that k <= n and P(n, k)
|
||||||
|
* fits into a long long (which is at least 64 bit). */
|
||||||
|
static const unsigned long long fast_perm_limits[] = {
|
||||||
|
0, ULLONG_MAX, 4294967296ULL, 2642246, 65537, 7133, 1627, 568, // 0-7
|
||||||
|
259, 142, 88, 61, 45, 36, 30, 26, // 8-15
|
||||||
|
24, 22, 21, 20, 20, // 16-20
|
||||||
|
};
|
||||||
|
if (k < Py_ARRAY_LENGTH(fast_perm_limits) && n <= fast_perm_limits[k]) {
|
||||||
|
if (n <= 127) {
|
||||||
|
/* P(n, k) fits into a uint64_t. */
|
||||||
|
uint64_t perm_odd_part = reduced_factorial_odd_part[n]
|
||||||
|
* inverted_factorial_odd_part[n - k];
|
||||||
|
int shift = factorial_trailing_zeros[n]
|
||||||
|
- factorial_trailing_zeros[n - k];
|
||||||
|
return PyLong_FromUnsignedLongLong(perm_odd_part << shift);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* P(n, k) = P(n, k-1) * (n-k+1) */
|
||||||
|
unsigned long long result = n;
|
||||||
for (unsigned long long i = 1; i < k;) {
|
for (unsigned long long i = 1; i < k;) {
|
||||||
result *= --n;
|
result *= --n;
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return PyLong_FromUnsignedLongLong(result);
|
return PyLong_FromUnsignedLongLong(result);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* For larger n use recursive formula. */
|
/* For larger n use recursive formulas:
|
||||||
/* C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j) */
|
*
|
||||||
|
* P(n, k) = P(n, j) * P(n-j, k-j)
|
||||||
|
* C(n, k) = C(n, j) * C(n-j, k-j) // C(k, j)
|
||||||
|
*/
|
||||||
unsigned long long j = k / 2;
|
unsigned long long j = k / 2;
|
||||||
PyObject *a, *b;
|
PyObject *a, *b;
|
||||||
a = perm_comb_small(n, j, iscomb);
|
a = perm_comb_small(n, j, iscomb);
|
||||||
|
@ -3450,90 +3612,6 @@ error:
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* least significant 64 bits of the odd part of factorial(n), for n in range(68).
|
|
||||||
|
|
||||||
Python code to generate the values:
|
|
||||||
|
|
||||||
import math
|
|
||||||
|
|
||||||
for n in range(68):
|
|
||||||
fac = math.factorial(n)
|
|
||||||
fac_odd_part = fac // (fac & -fac)
|
|
||||||
reduced_fac_odd_part = fac_odd_part % (2**64)
|
|
||||||
print(f"{reduced_fac_odd_part:#018x}u")
|
|
||||||
*/
|
|
||||||
static const uint64_t reduced_factorial_odd_part[] = {
|
|
||||||
0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0x0000000000000003u,
|
|
||||||
0x0000000000000003u, 0x000000000000000fu, 0x000000000000002du, 0x000000000000013bu,
|
|
||||||
0x000000000000013bu, 0x0000000000000b13u, 0x000000000000375fu, 0x0000000000026115u,
|
|
||||||
0x000000000007233fu, 0x00000000005cca33u, 0x0000000002898765u, 0x00000000260eeeebu,
|
|
||||||
0x00000000260eeeebu, 0x0000000286fddd9bu, 0x00000016beecca73u, 0x000001b02b930689u,
|
|
||||||
0x00000870d9df20adu, 0x0000b141df4dae31u, 0x00079dd498567c1bu, 0x00af2e19afc5266du,
|
|
||||||
0x020d8a4d0f4f7347u, 0x335281867ec241efu, 0x9b3093d46fdd5923u, 0x5e1f9767cc5866b1u,
|
|
||||||
0x92dd23d6966aced7u, 0xa30d0f4f0a196e5bu, 0x8dc3e5a1977d7755u, 0x2ab8ce915831734bu,
|
|
||||||
0x2ab8ce915831734bu, 0x81d2a0bc5e5fdcabu, 0x9efcac82445da75bu, 0xbc8b95cf58cde171u,
|
|
||||||
0xa0e8444a1f3cecf9u, 0x4191deb683ce3ffdu, 0xddd3878bc84ebfc7u, 0xcb39a64b83ff3751u,
|
|
||||||
0xf8203f7993fc1495u, 0xbd2a2a78b35f4bddu, 0x84757be6b6d13921u, 0x3fbbcfc0b524988bu,
|
|
||||||
0xbd11ed47c8928df9u, 0x3c26b59e41c2f4c5u, 0x677a5137e883fdb3u, 0xff74e943b03b93ddu,
|
|
||||||
0xfe5ebbcb10b2bb97u, 0xb021f1de3235e7e7u, 0x33509eb2e743a58fu, 0x390f9da41279fb7du,
|
|
||||||
0xe5cb0154f031c559u, 0x93074695ba4ddb6du, 0x81c471caa636247fu, 0xe1347289b5a1d749u,
|
|
||||||
0x286f21c3f76ce2ffu, 0x00be84a2173e8ac7u, 0x1595065ca215b88bu, 0xf95877595b018809u,
|
|
||||||
0x9c2efe3c5516f887u, 0x373294604679382bu, 0xaf1ff7a888adcd35u, 0x18ddf279a2c5800bu,
|
|
||||||
0x18ddf279a2c5800bu, 0x505a90e2542582cbu, 0x5bacad2cd8d5dc2bu, 0xfe3152bcbff89f41u,
|
|
||||||
};
|
|
||||||
|
|
||||||
/* inverses of reduced_factorial_odd_part values modulo 2**64.
|
|
||||||
|
|
||||||
Python code to generate the values:
|
|
||||||
|
|
||||||
import math
|
|
||||||
|
|
||||||
for n in range(68):
|
|
||||||
fac = math.factorial(n)
|
|
||||||
fac_odd_part = fac // (fac & -fac)
|
|
||||||
inverted_fac_odd_part = pow(fac_odd_part, -1, 2**64)
|
|
||||||
print(f"{inverted_fac_odd_part:#018x}u")
|
|
||||||
*/
|
|
||||||
static const uint64_t inverted_factorial_odd_part[] = {
|
|
||||||
0x0000000000000001u, 0x0000000000000001u, 0x0000000000000001u, 0xaaaaaaaaaaaaaaabu,
|
|
||||||
0xaaaaaaaaaaaaaaabu, 0xeeeeeeeeeeeeeeefu, 0x4fa4fa4fa4fa4fa5u, 0x2ff2ff2ff2ff2ff3u,
|
|
||||||
0x2ff2ff2ff2ff2ff3u, 0x938cc70553e3771bu, 0xb71c27cddd93e49fu, 0xb38e3229fcdee63du,
|
|
||||||
0xe684bb63544a4cbfu, 0xc2f684917ca340fbu, 0xf747c9cba417526du, 0xbb26eb51d7bd49c3u,
|
|
||||||
0xbb26eb51d7bd49c3u, 0xb0a7efb985294093u, 0xbe4b8c69f259eabbu, 0x6854d17ed6dc4fb9u,
|
|
||||||
0xe1aa904c915f4325u, 0x3b8206df131cead1u, 0x79c6009fea76fe13u, 0xd8c5d381633cd365u,
|
|
||||||
0x4841f12b21144677u, 0x4a91ff68200b0d0fu, 0x8f9513a58c4f9e8bu, 0x2b3e690621a42251u,
|
|
||||||
0x4f520f00e03c04e7u, 0x2edf84ee600211d3u, 0xadcaa2764aaacdfdu, 0x161f4f9033f4fe63u,
|
|
||||||
0x161f4f9033f4fe63u, 0xbada2932ea4d3e03u, 0xcec189f3efaa30d3u, 0xf7475bb68330bf91u,
|
|
||||||
0x37eb7bf7d5b01549u, 0x46b35660a4e91555u, 0xa567c12d81f151f7u, 0x4c724007bb2071b1u,
|
|
||||||
0x0f4a0cce58a016bdu, 0xfa21068e66106475u, 0x244ab72b5a318ae1u, 0x366ce67e080d0f23u,
|
|
||||||
0xd666fdae5dd2a449u, 0xd740ddd0acc06a0du, 0xb050bbbb28e6f97bu, 0x70b003fe890a5c75u,
|
|
||||||
0xd03aabff83037427u, 0x13ec4ca72c783bd7u, 0x90282c06afdbd96fu, 0x4414ddb9db4a95d5u,
|
|
||||||
0xa2c68735ae6832e9u, 0xbf72d71455676665u, 0xa8469fab6b759b7fu, 0xc1e55b56e606caf9u,
|
|
||||||
0x40455630fc4a1cffu, 0x0120a7b0046d16f7u, 0xa7c3553b08faef23u, 0x9f0bfd1b08d48639u,
|
|
||||||
0xa433ffce9a304d37u, 0xa22ad1d53915c683u, 0xcb6cbc723ba5dd1du, 0x547fb1b8ab9d0ba3u,
|
|
||||||
0x547fb1b8ab9d0ba3u, 0x8f15a826498852e3u, 0x32e1a03f38880283u, 0x3de4cce63283f0c1u,
|
|
||||||
};
|
|
||||||
|
|
||||||
/* exponent of the largest power of 2 dividing factorial(n), for n in range(68)
|
|
||||||
|
|
||||||
Python code to generate the values:
|
|
||||||
|
|
||||||
import math
|
|
||||||
|
|
||||||
for n in range(68):
|
|
||||||
fac = math.factorial(n)
|
|
||||||
fac_trailing_zeros = (fac & -fac).bit_length() - 1
|
|
||||||
print(fac_trailing_zeros)
|
|
||||||
*/
|
|
||||||
|
|
||||||
static const uint8_t factorial_trailing_zeros[] = {
|
|
||||||
0, 0, 1, 1, 3, 3, 4, 4, 7, 7, 8, 8, 10, 10, 11, 11, // 0-15
|
|
||||||
15, 15, 16, 16, 18, 18, 19, 19, 22, 22, 23, 23, 25, 25, 26, 26, // 16-31
|
|
||||||
31, 31, 32, 32, 34, 34, 35, 35, 38, 38, 39, 39, 41, 41, 42, 42, // 32-47
|
|
||||||
46, 46, 47, 47, 49, 49, 50, 50, 53, 53, 54, 54, 56, 56, 57, 57, // 48-63
|
|
||||||
63, 63, 64, 64, // 64-67
|
|
||||||
};
|
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
math.comb
|
math.comb
|
||||||
|
|
||||||
|
@ -3597,28 +3675,6 @@ math_comb_impl(PyObject *module, PyObject *n, PyObject *k)
|
||||||
}
|
}
|
||||||
assert(ki >= 0);
|
assert(ki >= 0);
|
||||||
|
|
||||||
if (ni <= 67) {
|
|
||||||
/*
|
|
||||||
For 0 <= k <= n <= 67, comb(n, k) always fits into a uint64_t.
|
|
||||||
We compute it as
|
|
||||||
|
|
||||||
comb_odd_part << shift
|
|
||||||
|
|
||||||
where 2**shift is the largest power of two dividing comb(n, k)
|
|
||||||
and comb_odd_part is comb(n, k) >> shift. comb_odd_part can be
|
|
||||||
calculated efficiently via arithmetic modulo 2**64, using three
|
|
||||||
lookups and two uint64_t multiplications.
|
|
||||||
*/
|
|
||||||
uint64_t comb_odd_part = reduced_factorial_odd_part[ni]
|
|
||||||
* inverted_factorial_odd_part[ki]
|
|
||||||
* inverted_factorial_odd_part[ni - ki];
|
|
||||||
int shift = factorial_trailing_zeros[ni]
|
|
||||||
- factorial_trailing_zeros[ki]
|
|
||||||
- factorial_trailing_zeros[ni - ki];
|
|
||||||
result = PyLong_FromUnsignedLongLong(comb_odd_part << shift);
|
|
||||||
goto done;
|
|
||||||
}
|
|
||||||
|
|
||||||
ki = Py_MIN(ki, ni - ki);
|
ki = Py_MIN(ki, ni - ki);
|
||||||
if (ki > 1) {
|
if (ki > 1) {
|
||||||
result = perm_comb_small((unsigned long long)ni,
|
result = perm_comb_small((unsigned long long)ni,
|
||||||
|
|
Loading…
Reference in New Issue