mirror of https://github.com/python/cpython
128 lines
2.9 KiB
Plaintext
128 lines
2.9 KiB
Plaintext
|
|
|
|
(* Copyright (c) 2011 Stefan Krah. All rights reserved. *)
|
|
|
|
|
|
==========================================================================
|
|
Calculate (a * b) % p using special primes
|
|
==========================================================================
|
|
|
|
A description of the algorithm can be found in the apfloat manual by
|
|
Tommila [1].
|
|
|
|
|
|
Definitions:
|
|
------------
|
|
|
|
In the whole document, "==" stands for "is congruent with".
|
|
|
|
Result of a * b in terms of high/low words:
|
|
|
|
(1) hi * 2**64 + lo = a * b
|
|
|
|
Special primes:
|
|
|
|
(2) p = 2**64 - z + 1, where z = 2**n
|
|
|
|
Single step modular reduction:
|
|
|
|
(3) R(hi, lo) = hi * z - hi + lo
|
|
|
|
|
|
Strategy:
|
|
---------
|
|
|
|
a) Set (hi, lo) to the result of a * b.
|
|
|
|
b) Set (hi', lo') to the result of R(hi, lo).
|
|
|
|
c) Repeat step b) until 0 <= hi' * 2**64 + lo' < 2*p.
|
|
|
|
d) If the result is less than p, return lo'. Otherwise return lo' - p.
|
|
|
|
|
|
The reduction step b) preserves congruence:
|
|
-------------------------------------------
|
|
|
|
hi * 2**64 + lo == hi * z - hi + lo (mod p)
|
|
|
|
Proof:
|
|
~~~~~~
|
|
|
|
hi * 2**64 + lo = (2**64 - z + 1) * hi + z * hi - hi + lo
|
|
|
|
= p * hi + z * hi - hi + lo
|
|
|
|
== z * hi - hi + lo (mod p)
|
|
|
|
|
|
Maximum numbers of step b):
|
|
---------------------------
|
|
|
|
# To avoid unnecessary formalism, define:
|
|
|
|
def R(hi, lo, z):
|
|
return divmod(hi * z - hi + lo, 2**64)
|
|
|
|
# For simplicity, assume hi=2**64-1, lo=2**64-1 after the
|
|
# initial multiplication a * b. This is of course impossible
|
|
# but certainly covers all cases.
|
|
|
|
# Then, for p1:
|
|
hi=2**64-1; lo=2**64-1; z=2**32
|
|
p1 = 2**64 - z + 1
|
|
|
|
hi, lo = R(hi, lo, z) # First reduction
|
|
hi, lo = R(hi, lo, z) # Second reduction
|
|
hi * 2**64 + lo < 2 * p1 # True
|
|
|
|
# For p2:
|
|
hi=2**64-1; lo=2**64-1; z=2**34
|
|
p2 = 2**64 - z + 1
|
|
|
|
hi, lo = R(hi, lo, z) # First reduction
|
|
hi, lo = R(hi, lo, z) # Second reduction
|
|
hi, lo = R(hi, lo, z) # Third reduction
|
|
hi * 2**64 + lo < 2 * p2 # True
|
|
|
|
# For p3:
|
|
hi=2**64-1; lo=2**64-1; z=2**40
|
|
p3 = 2**64 - z + 1
|
|
|
|
hi, lo = R(hi, lo, z) # First reduction
|
|
hi, lo = R(hi, lo, z) # Second reduction
|
|
hi, lo = R(hi, lo, z) # Third reduction
|
|
hi * 2**64 + lo < 2 * p3 # True
|
|
|
|
|
|
Step d) preserves congruence and yields a result < p:
|
|
-----------------------------------------------------
|
|
|
|
Case hi = 0:
|
|
|
|
Case lo < p: trivial.
|
|
|
|
Case lo >= p:
|
|
|
|
lo == lo - p (mod p) # result is congruent
|
|
|
|
p <= lo < 2*p -> 0 <= lo - p < p # result is in the correct range
|
|
|
|
Case hi = 1:
|
|
|
|
p < 2**64 /\ 2**64 + lo < 2*p -> lo < p # lo is always less than p
|
|
|
|
2**64 + lo == 2**64 + (lo - p) (mod p) # result is congruent
|
|
|
|
= lo - p # exactly the same value as the previous RHS
|
|
# in uint64_t arithmetic.
|
|
|
|
p < 2**64 + lo < 2*p -> 0 < 2**64 + (lo - p) < p # correct range
|
|
|
|
|
|
|
|
[1] http://www.apfloat.org/apfloat/2.40/apfloat.pdf
|
|
|
|
|
|
|