Issue #7652: Clean up _mpd_qinvroot() and mark it LIBMPDEC_ONLY. Use the

algorithm from decimal.py for mpd_qsqrt().
This commit is contained in:
Stefan Krah 2012-07-12 21:17:59 +02:00
parent c128167318
commit e574402bd6
1 changed files with 144 additions and 139 deletions

View File

@ -4298,7 +4298,7 @@ static const mpd_t _mpd_ln10 = {
/* /*
* Set 'result' to log(10). * Set 'result' to log(10).
* Ulp error: abs(result - log(10)) < ulp(log(10)) * Ulp error: abs(result - log(10)) < ulp(log(10))
* Relative error : abs(result - log(10)) < 5 * 10**-prec * log(10) * Relative error: abs(result - log(10)) < 5 * 10**-prec * log(10)
* *
* NOTE: The relative error is not derived from the ulp error, but * NOTE: The relative error is not derived from the ulp error, but
* calculated separately using the fact that 23/10 < log(10) < 24/10. * calculated separately using the fact that 23/10 < log(10) < 24/10.
@ -7204,6 +7204,19 @@ nanresult:
mpd_setspecial(r, MPD_POS, MPD_NAN); mpd_setspecial(r, MPD_POS, MPD_NAN);
} }
/* LIBMPDEC_ONLY */
/*
* Schedule the optimal precision increase for the Newton iteration.
* v := input operand
* z_0 := initial approximation
* initprec := natural number such that abs(sqrt(v) - z_0) < 10**-initprec
* maxprec := target precision
*
* For convenience the output klist contains the elements in reverse order:
* klist := [k_n-1, ..., k_0], where
* 1) k_0 <= initprec and
* 2) abs(sqrt(v) - result) < 10**(-2*k_n-1 + 2) <= 10**-maxprec.
*/
static inline int static inline int
invroot_schedule_prec(mpd_ssize_t klist[MPD_MAX_PREC_LOG2], invroot_schedule_prec(mpd_ssize_t klist[MPD_MAX_PREC_LOG2],
mpd_ssize_t maxprec, mpd_ssize_t initprec) mpd_ssize_t maxprec, mpd_ssize_t initprec)
@ -7224,29 +7237,27 @@ invroot_schedule_prec(mpd_ssize_t klist[MPD_MAX_PREC_LOG2],
} }
/* /*
* Initial approximation for the inverse square root. * Initial approximation for the inverse square root function.
*
* Input: * Input:
* v := 7 or 8 decimal digits with an implicit exponent of 10**-6, * v := rational number, with 1 <= v < 100
* representing a number 1 <= x < 100. * vhat := floor(v * 10**6)
*
* Output: * Output:
* An approximation to 1/sqrt(v) * z := approximation to 1/sqrt(v), such that abs(z - 1/sqrt(v)) < 10**-3.
*/ */
static inline void static inline void
_invroot_init_approx(mpd_t *z, mpd_uint_t v) _invroot_init_approx(mpd_t *z, mpd_uint_t vhat)
{ {
mpd_uint_t lo = 1000; mpd_uint_t lo = 1000;
mpd_uint_t hi = 10000; mpd_uint_t hi = 10000;
mpd_uint_t a, sq; mpd_uint_t a, sq;
assert(v >= lo*lo && v < (hi+1)*(hi+1)); assert(lo*lo <= vhat && vhat < (hi+1)*(hi+1));
for(;;) { for(;;) {
a = (lo + hi) / 2; a = (lo + hi) / 2;
sq = a * a; sq = a * a;
if (v >= sq) { if (vhat >= sq) {
if (v < sq + 2*a + 1) { if (vhat < sq + 2*a + 1) {
break; break;
} }
lo = a + 1; lo = a + 1;
@ -7256,7 +7267,19 @@ _invroot_init_approx(mpd_t *z, mpd_uint_t v)
} }
} }
/* At this point a/1000 is an approximation to sqrt(v). */ /*
* After the binary search we have:
* 1) a**2 <= floor(v * 10**6) < (a + 1)**2
* This implies:
* 2) a**2 <= v * 10**6 < (a + 1)**2
* 3) a <= sqrt(v) * 10**3 < a + 1
* Since 10**3 <= a:
* 4) 0 <= 10**prec/a - 1/sqrt(v) < 10**-prec
* We have:
* 5) 10**3/a - 10**-3 < floor(10**9/a) * 10**-6 <= 10**3/a
* Merging 4) and 5):
* 6) abs(floor(10**9/a) * 10**-6 - 1/sqrt(v)) < 10**-3
*/
mpd_minalloc(z); mpd_minalloc(z);
mpd_clear_flags(z); mpd_clear_flags(z);
z->data[0] = 1000000000UL / a; z->data[0] = 1000000000UL / a;
@ -7265,6 +7288,10 @@ _invroot_init_approx(mpd_t *z, mpd_uint_t v)
mpd_setdigits(z); mpd_setdigits(z);
} }
/*
* Set 'result' to 1/sqrt(a).
* Relative error: abs(result - 1/sqrt(a)) < 10**-prec * 1/sqrt(a)
*/
static void static void
_mpd_qinvroot(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, _mpd_qinvroot(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx,
uint32_t *status) uint32_t *status)
@ -7282,7 +7309,7 @@ _mpd_qinvroot(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx,
mpd_ssize_t ideal_exp, shift; mpd_ssize_t ideal_exp, shift;
mpd_ssize_t adj, tz; mpd_ssize_t adj, tz;
mpd_ssize_t maxprec, fracdigits; mpd_ssize_t maxprec, fracdigits;
mpd_uint_t x, dummy; mpd_uint_t vhat, dummy;
int i, n; int i, n;
@ -7301,30 +7328,33 @@ _mpd_qinvroot(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx,
fracdigits = v->digits - 1; fracdigits = v->digits - 1;
v->exp = -fracdigits; v->exp = -fracdigits;
n = (v->digits > 7) ? 7 : (int)v->digits; n = (v->digits > 7) ? 7 : (int)v->digits;
_mpd_get_msdigits(&dummy, &x, v, n); /* Let vhat := floor(v * 10**(2*initprec)) */
_mpd_get_msdigits(&dummy, &vhat, v, n);
if (n < 7) { if (n < 7) {
x *= mpd_pow10[7-n]; vhat *= mpd_pow10[7-n];
} }
} }
else { else {
fracdigits = v->digits - 2; fracdigits = v->digits - 2;
v->exp = -fracdigits; v->exp = -fracdigits;
n = (v->digits > 8) ? 8 : (int)v->digits; n = (v->digits > 8) ? 8 : (int)v->digits;
_mpd_get_msdigits(&dummy, &x, v, n); /* Let vhat := floor(v * 10**(2*initprec)) */
_mpd_get_msdigits(&dummy, &vhat, v, n);
if (n < 8) { if (n < 8) {
x *= mpd_pow10[8-n]; vhat *= mpd_pow10[8-n];
} }
} }
adj = (a->exp-v->exp) / 2; adj = (a->exp-v->exp) / 2;
/* initial approximation */ /* initial approximation */
_invroot_init_approx(z, x); _invroot_init_approx(z, vhat);
mpd_maxcontext(&maxcontext); mpd_maxcontext(&maxcontext);
mpd_maxcontext(&varcontext); mpd_maxcontext(&varcontext);
varcontext.round = MPD_ROUND_TRUNC; varcontext.round = MPD_ROUND_TRUNC;
maxprec = ctx->prec + 2; maxprec = ctx->prec + 1;
/* initprec == 3 */
i = invroot_schedule_prec(klist, maxprec, 3); i = invroot_schedule_prec(klist, maxprec, 3);
for (; i >= 0; i--) { for (; i >= 0; i--) {
varcontext.prec = 2*klist[i]+2; varcontext.prec = 2*klist[i]+2;
@ -7358,15 +7388,14 @@ _mpd_qinvroot(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx,
mpd_del(&t); mpd_del(&t);
if (v != &vtmp) mpd_del(v); if (v != &vtmp) mpd_del(v);
*status |= (workstatus&MPD_Errors); *status |= (workstatus&MPD_Errors);
varcontext = *ctx; *status |= (MPD_Rounded|MPD_Inexact);
varcontext.round = MPD_ROUND_HALF_EVEN;
mpd_qfinalize(result, &varcontext, status);
} }
void void
mpd_qinvroot(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, mpd_qinvroot(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx,
uint32_t *status) uint32_t *status)
{ {
mpd_context_t workctx;
if (mpd_isspecial(a)) { if (mpd_isspecial(a)) {
if (mpd_qcheck_nan(result, a, ctx, status)) { if (mpd_qcheck_nan(result, a, ctx, status)) {
@ -7391,75 +7420,29 @@ mpd_qinvroot(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx,
return; return;
} }
_mpd_qinvroot(result, a, ctx, status); workctx = *ctx;
} workctx.prec += 2;
workctx.round = MPD_ROUND_HALF_EVEN;
/* _mpd_qinvroot(result, a, &workctx, status);
* Ensure correct rounding. Algorithm after Hull & Abrham, "Properly Rounded mpd_qfinalize(result, ctx, status);
* Variable Precision Square Root", ACM Transactions on Mathematical Software,
* Vol. 11, No. 3.
*/
static void
_mpd_fix_sqrt(mpd_t *result, const mpd_t *a, mpd_t *tmp,
const mpd_context_t *ctx, uint32_t *status)
{
mpd_context_t maxctx;
MPD_NEW_CONST(u,0,0,1,1,1,5);
mpd_maxcontext(&maxctx);
u.exp = u.digits - ctx->prec + result->exp - 1;
_mpd_qsub(tmp, result, &u, &maxctx, status);
if (*status&MPD_Errors) goto nanresult;
_mpd_qmul(tmp, tmp, tmp, &maxctx, status);
if (*status&MPD_Errors) goto nanresult;
if (_mpd_cmp(tmp, a) == 1) {
u.exp += 1;
u.data[0] = 1;
_mpd_qsub(result, result, &u, &maxctx, status);
}
else {
_mpd_qadd(tmp, result, &u, &maxctx, status);
if (*status&MPD_Errors) goto nanresult;
_mpd_qmul(tmp, tmp, tmp, &maxctx, status);
if (*status&MPD_Errors) goto nanresult;
if (_mpd_cmp(tmp, a) == -1) {
u.exp += 1;
u.data[0] = 1;
_mpd_qadd(result, result, &u, &maxctx, status);
}
}
return;
nanresult:
mpd_setspecial(result, MPD_POS, MPD_NAN);
} }
/* END LIBMPDEC_ONLY */
/* Algorithm from decimal.py */
void void
mpd_qsqrt(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx, mpd_qsqrt(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx,
uint32_t *status) uint32_t *status)
{ {
uint32_t workstatus = 0; mpd_context_t maxcontext;
mpd_context_t varcontext; MPD_NEW_STATIC(c,0,0,0,0);
mpd_t *z = result; /* current approximation */ MPD_NEW_STATIC(q,0,0,0,0);
MPD_NEW_STATIC(v,0,0,0,0); /* a, normalized to a number between 1 and 10 */ MPD_NEW_STATIC(r,0,0,0,0);
MPD_NEW_STATIC(vtmp,0,0,0,0); MPD_NEW_CONST(two,0,0,1,1,1,2);
MPD_NEW_STATIC(tmp,0,0,0,0); mpd_ssize_t prec, ideal_exp;
mpd_ssize_t ideal_exp, shift; mpd_ssize_t l, shift;
mpd_ssize_t target_prec, fracdigits;
mpd_ssize_t a_exp, a_digits;
mpd_ssize_t adj, tz;
mpd_uint_t dummy, t;
int exact = 0; int exact = 0;
varcontext = *ctx;
varcontext.round = MPD_ROUND_HALF_EVEN;
ideal_exp = (a->exp - (a->exp & 1)) / 2; ideal_exp = (a->exp - (a->exp & 1)) / 2;
if (mpd_isspecial(a)) { if (mpd_isspecial(a)) {
@ -7483,79 +7466,101 @@ mpd_qsqrt(mpd_t *result, const mpd_t *a, const mpd_context_t *ctx,
return; return;
} }
if (!mpd_qcopy(&v, a, status)) { mpd_maxcontext(&maxcontext);
mpd_seterror(result, MPD_Malloc_error, status); prec = ctx->prec + 1;
goto finish;
if (!mpd_qcopy(&c, a, status)) {
goto malloc_error;
} }
c.exp = 0;
a_exp = a->exp; if (a->exp & 1) {
a_digits = a->digits; if (!mpd_qshiftl(&c, &c, 1, status)) {
goto malloc_error;
/* normalize a to 1 <= v < 100 */ }
if ((v.digits+v.exp) & 1) { l = (a->digits >> 1) + 1;
fracdigits = v.digits - 1;
v.exp = -fracdigits;
_mpd_get_msdigits(&dummy, &t, &v, 3);
t = t < 100 ? t*10 : t;
t = t < 100 ? t*10 : t;
} }
else { else {
fracdigits = v.digits - 2; l = (a->digits + 1) >> 1;
v.exp = -fracdigits;
_mpd_get_msdigits(&dummy, &t, &v, 4);
t = t < 1000 ? t*10 : t;
t = t < 1000 ? t*10 : t;
t = t < 1000 ? t*10 : t;
} }
adj = (a_exp-v.exp) / 2;
shift = prec - l;
/* use excess digits */ if (shift >= 0) {
target_prec = (a_digits > ctx->prec) ? a_digits : ctx->prec; if (!mpd_qshiftl(&c, &c, 2*shift, status)) {
target_prec += 2; goto malloc_error;
varcontext.prec = target_prec + 3;
/* invroot is much faster for large numbers */
_mpd_qinvroot(&tmp, &v, &varcontext, &workstatus);
varcontext.prec = target_prec;
_mpd_qdiv(NO_IDEAL_EXP, z, &one, &tmp, &varcontext, &workstatus);
tz = mpd_trail_zeros(result);
if ((result->digits-tz)*2-1 <= v.digits) {
_mpd_qmul(&tmp, result, result, &varcontext, &workstatus);
if (workstatus&MPD_Errors) {
mpd_seterror(result, workstatus&MPD_Errors, status);
goto finish;
} }
exact = (_mpd_cmp(&tmp, &v) == 0); exact = 1;
} }
*status |= (workstatus&MPD_Errors); else {
exact = !mpd_qshiftr_inplace(&c, -2*shift);
if (!exact && !mpd_isspecial(result) && !mpd_iszero(result)) { }
_mpd_fix_sqrt(result, &v, &tmp, &varcontext, status);
if (mpd_isspecial(result)) goto finish; ideal_exp -= shift;
*status |= (MPD_Rounded|MPD_Inexact);
/* find result = floor(sqrt(c)) using Newton's method */
if (!mpd_qshiftl(result, &one, prec, status)) {
goto malloc_error;
}
while (1) {
_mpd_qdivmod(&q, &r, &c, result, &maxcontext, &maxcontext.status);
if (mpd_isspecial(result) || mpd_isspecial(&q)) {
mpd_seterror(result, maxcontext.status&MPD_Errors, status);
goto out;
}
if (_mpd_cmp(result, &q) <= 0) {
break;
}
_mpd_qadd_exact(result, result, &q, &maxcontext, &maxcontext.status);
if (mpd_isspecial(result)) {
mpd_seterror(result, maxcontext.status&MPD_Errors, status);
goto out;
}
_mpd_qdivmod(result, &r, result, &two, &maxcontext, &maxcontext.status);
} }
result->exp += adj;
if (exact) { if (exact) {
shift = ideal_exp - result->exp; _mpd_qmul_exact(&r, result, result, &maxcontext, &maxcontext.status);
shift = (tz > shift) ? shift : tz; if (mpd_isspecial(&r)) {
if (shift > 0) { mpd_seterror(result, maxcontext.status&MPD_Errors, status);
goto out;
}
exact = (_mpd_cmp(&r, &c) == 0);
}
if (exact) {
if (shift >= 0) {
mpd_qshiftr_inplace(result, shift); mpd_qshiftr_inplace(result, shift);
result->exp += shift; }
else {
if (!mpd_qshiftl(result, result, -shift, status)) {
goto malloc_error;
}
}
ideal_exp += shift;
}
else {
int lsd = mpd_lsd(result->data[0]);
if (lsd == 0 || lsd == 5) {
result->data[0] += 1;
} }
} }
result->exp = ideal_exp;
finish:
mpd_del(&v); out:
mpd_del(&vtmp); mpd_del(&c);
mpd_del(&tmp); mpd_del(&q);
varcontext.prec = ctx->prec; mpd_del(&r);
mpd_qfinalize(result, &varcontext, status); maxcontext = *ctx;
maxcontext.round = MPD_ROUND_HALF_EVEN;
mpd_qfinalize(result, &maxcontext, status);
return;
malloc_error:
mpd_seterror(result, MPD_Malloc_error, status);
goto out;
} }