mirror of https://github.com/python/cpython
Cautious introduction of a patch that started from
SF 560379: Karatsuba multiplication. Lots of things were changed from that. This needs a lot more testing, for correctness and speed, the latter especially when bit lengths are unbalanced. For now, the Karatsuba code gets invoked if and only if envar KARAT exists.
This commit is contained in:
parent
5f7617b5f6
commit
5af4e6c739
|
@ -106,6 +106,7 @@ Greg Couch
|
||||||
Steve Cousins
|
Steve Cousins
|
||||||
Alex Coventry
|
Alex Coventry
|
||||||
Matthew Dixon Cowles
|
Matthew Dixon Cowles
|
||||||
|
Christopher A. Craig
|
||||||
Drew Csillag
|
Drew Csillag
|
||||||
Tom Culliton
|
Tom Culliton
|
||||||
John Cugini
|
John Cugini
|
||||||
|
|
10
Misc/NEWS
10
Misc/NEWS
|
@ -6,6 +6,10 @@ Type/class unification and new-style classes
|
||||||
|
|
||||||
Core and builtins
|
Core and builtins
|
||||||
|
|
||||||
|
- XXX Karatsuba multiplication. This is currently used if and only
|
||||||
|
if envar KARAT exists. It needs more correctness and speed testing,
|
||||||
|
the latter especially with unbalanced bit lengths.
|
||||||
|
|
||||||
- u'%c' will now raise a ValueError in case the argument is an
|
- u'%c' will now raise a ValueError in case the argument is an
|
||||||
integer outside the valid range of Unicode code point ordinals.
|
integer outside the valid range of Unicode code point ordinals.
|
||||||
|
|
||||||
|
@ -66,8 +70,8 @@ Core and builtins
|
||||||
other platforms. KeyboardInterrupt can now reliably be caught,
|
other platforms. KeyboardInterrupt can now reliably be caught,
|
||||||
and Ctrl+C at an interative prompt no longer terminates the
|
and Ctrl+C at an interative prompt no longer terminates the
|
||||||
process under NT/2k/XP (it never did under Win9x). Ctrl+C will
|
process under NT/2k/XP (it never did under Win9x). Ctrl+C will
|
||||||
interrupt time.sleep() in the main thread, and any child processes
|
interrupt time.sleep() in the main thread, and any child processes
|
||||||
created via the popen family (on win2k; we can't make win9x work
|
created via the popen family (on win2k; we can't make win9x work
|
||||||
reliably) are also interrupted (as generally happens on for Linux/Unix.)
|
reliably) are also interrupted (as generally happens on for Linux/Unix.)
|
||||||
[SF bugs 231273, 439992 and 581232]
|
[SF bugs 231273, 439992 and 581232]
|
||||||
|
|
||||||
|
@ -83,7 +87,7 @@ Core and builtins
|
||||||
as directory names.
|
as directory names.
|
||||||
|
|
||||||
- The built-ins slice() and buffer() are now callable types. The
|
- The built-ins slice() and buffer() are now callable types. The
|
||||||
types classobj (formerly class), code, function, instance, and
|
0 types classobj (formerly class), code, function, instance, and
|
||||||
instancemethod (formerly instance-method), which have no built-in
|
instancemethod (formerly instance-method), which have no built-in
|
||||||
names but are accessible through the types module, are now also
|
names but are accessible through the types module, are now also
|
||||||
callable. The type dict-proxy is renamed to dictproxy.
|
callable. The type dict-proxy is renamed to dictproxy.
|
||||||
|
|
|
@ -8,8 +8,19 @@
|
||||||
|
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
|
||||||
|
/* For long multiplication, use the O(N**2) school algorithm unless
|
||||||
|
* both operands contain more than KARATSUBA_CUTOFF digits (this
|
||||||
|
* being an internal Python long digit, in base BASE).
|
||||||
|
*/
|
||||||
|
#define KARATSUBA_CUTOFF 35
|
||||||
|
|
||||||
#define ABS(x) ((x) < 0 ? -(x) : (x))
|
#define ABS(x) ((x) < 0 ? -(x) : (x))
|
||||||
|
|
||||||
|
#undef MIN
|
||||||
|
#undef MAX
|
||||||
|
#define MAX(x, y) ((x) < (y) ? (y) : (x))
|
||||||
|
#define MIN(x, y) ((x) > (y) ? (y) : (x))
|
||||||
|
|
||||||
/* Forward */
|
/* Forward */
|
||||||
static PyLongObject *long_normalize(PyLongObject *);
|
static PyLongObject *long_normalize(PyLongObject *);
|
||||||
static PyLongObject *mul1(PyLongObject *, wdigit);
|
static PyLongObject *mul1(PyLongObject *, wdigit);
|
||||||
|
@ -34,7 +45,7 @@ long_normalize(register PyLongObject *v)
|
||||||
{
|
{
|
||||||
int j = ABS(v->ob_size);
|
int j = ABS(v->ob_size);
|
||||||
register int i = j;
|
register int i = j;
|
||||||
|
|
||||||
while (i > 0 && v->ob_digit[i-1] == 0)
|
while (i > 0 && v->ob_digit[i-1] == 0)
|
||||||
--i;
|
--i;
|
||||||
if (i != j)
|
if (i != j)
|
||||||
|
@ -226,7 +237,7 @@ PyLong_AsUnsignedLong(PyObject *vv)
|
||||||
register PyLongObject *v;
|
register PyLongObject *v;
|
||||||
unsigned long x, prev;
|
unsigned long x, prev;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (vv == NULL || !PyLong_Check(vv)) {
|
if (vv == NULL || !PyLong_Check(vv)) {
|
||||||
PyErr_BadInternalCall();
|
PyErr_BadInternalCall();
|
||||||
return (unsigned long) -1;
|
return (unsigned long) -1;
|
||||||
|
@ -492,7 +503,7 @@ _PyLong_AsByteArray(PyLongObject* v,
|
||||||
Overflow:
|
Overflow:
|
||||||
PyErr_SetString(PyExc_OverflowError, "long too big to convert");
|
PyErr_SetString(PyExc_OverflowError, "long too big to convert");
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
double
|
double
|
||||||
|
@ -734,7 +745,7 @@ PyLong_AsUnsignedLongLong(PyObject *vv)
|
||||||
|
|
||||||
static int
|
static int
|
||||||
convert_binop(PyObject *v, PyObject *w, PyLongObject **a, PyLongObject **b) {
|
convert_binop(PyObject *v, PyObject *w, PyLongObject **a, PyLongObject **b) {
|
||||||
if (PyLong_Check(v)) {
|
if (PyLong_Check(v)) {
|
||||||
*a = (PyLongObject *) v;
|
*a = (PyLongObject *) v;
|
||||||
Py_INCREF(v);
|
Py_INCREF(v);
|
||||||
}
|
}
|
||||||
|
@ -744,7 +755,7 @@ convert_binop(PyObject *v, PyObject *w, PyLongObject **a, PyLongObject **b) {
|
||||||
else {
|
else {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (PyLong_Check(w)) {
|
if (PyLong_Check(w)) {
|
||||||
*b = (PyLongObject *) w;
|
*b = (PyLongObject *) w;
|
||||||
Py_INCREF(w);
|
Py_INCREF(w);
|
||||||
}
|
}
|
||||||
|
@ -782,7 +793,7 @@ muladd1(PyLongObject *a, wdigit n, wdigit extra)
|
||||||
PyLongObject *z = _PyLong_New(size_a+1);
|
PyLongObject *z = _PyLong_New(size_a+1);
|
||||||
twodigits carry = extra;
|
twodigits carry = extra;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (z == NULL)
|
if (z == NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
for (i = 0; i < size_a; ++i) {
|
for (i = 0; i < size_a; ++i) {
|
||||||
|
@ -826,7 +837,7 @@ divrem1(PyLongObject *a, digit n, digit *prem)
|
||||||
{
|
{
|
||||||
const int size = ABS(a->ob_size);
|
const int size = ABS(a->ob_size);
|
||||||
PyLongObject *z;
|
PyLongObject *z;
|
||||||
|
|
||||||
assert(n > 0 && n <= MASK);
|
assert(n > 0 && n <= MASK);
|
||||||
z = _PyLong_New(size);
|
z = _PyLong_New(size);
|
||||||
if (z == NULL)
|
if (z == NULL)
|
||||||
|
@ -855,7 +866,7 @@ long_format(PyObject *aa, int base, int addL)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
assert(base >= 2 && base <= 36);
|
assert(base >= 2 && base <= 36);
|
||||||
|
|
||||||
/* Compute a rough upper bound for the length of the string */
|
/* Compute a rough upper bound for the length of the string */
|
||||||
i = base;
|
i = base;
|
||||||
bits = 0;
|
bits = 0;
|
||||||
|
@ -873,7 +884,7 @@ long_format(PyObject *aa, int base, int addL)
|
||||||
*--p = 'L';
|
*--p = 'L';
|
||||||
if (a->ob_size < 0)
|
if (a->ob_size < 0)
|
||||||
sign = '-';
|
sign = '-';
|
||||||
|
|
||||||
if (a->ob_size == 0) {
|
if (a->ob_size == 0) {
|
||||||
*--p = '0';
|
*--p = '0';
|
||||||
}
|
}
|
||||||
|
@ -992,7 +1003,7 @@ PyLong_FromString(char *str, char **pend, int base)
|
||||||
int sign = 1;
|
int sign = 1;
|
||||||
char *start, *orig_str = str;
|
char *start, *orig_str = str;
|
||||||
PyLongObject *z;
|
PyLongObject *z;
|
||||||
|
|
||||||
if ((base != 0 && base < 2) || base > 36) {
|
if ((base != 0 && base < 2) || base > 36) {
|
||||||
PyErr_SetString(PyExc_ValueError,
|
PyErr_SetString(PyExc_ValueError,
|
||||||
"long() arg 2 must be >= 2 and <= 36");
|
"long() arg 2 must be >= 2 and <= 36");
|
||||||
|
@ -1023,7 +1034,7 @@ PyLong_FromString(char *str, char **pend, int base)
|
||||||
for ( ; z != NULL; ++str) {
|
for ( ; z != NULL; ++str) {
|
||||||
int k = -1;
|
int k = -1;
|
||||||
PyLongObject *temp;
|
PyLongObject *temp;
|
||||||
|
|
||||||
if (*str <= '9')
|
if (*str <= '9')
|
||||||
k = *str - '0';
|
k = *str - '0';
|
||||||
else if (*str >= 'a')
|
else if (*str >= 'a')
|
||||||
|
@ -1053,7 +1064,7 @@ PyLong_FromString(char *str, char **pend, int base)
|
||||||
return (PyObject *) z;
|
return (PyObject *) z;
|
||||||
|
|
||||||
onError:
|
onError:
|
||||||
PyErr_Format(PyExc_ValueError,
|
PyErr_Format(PyExc_ValueError,
|
||||||
"invalid literal for long(): %.200s", orig_str);
|
"invalid literal for long(): %.200s", orig_str);
|
||||||
Py_XDECREF(z);
|
Py_XDECREF(z);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
@ -1092,7 +1103,7 @@ long_divrem(PyLongObject *a, PyLongObject *b,
|
||||||
{
|
{
|
||||||
int size_a = ABS(a->ob_size), size_b = ABS(b->ob_size);
|
int size_a = ABS(a->ob_size), size_b = ABS(b->ob_size);
|
||||||
PyLongObject *z;
|
PyLongObject *z;
|
||||||
|
|
||||||
if (size_b == 0) {
|
if (size_b == 0) {
|
||||||
PyErr_SetString(PyExc_ZeroDivisionError,
|
PyErr_SetString(PyExc_ZeroDivisionError,
|
||||||
"long division or modulo by zero");
|
"long division or modulo by zero");
|
||||||
|
@ -1142,26 +1153,26 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem)
|
||||||
PyLongObject *w = mul1(w1, d);
|
PyLongObject *w = mul1(w1, d);
|
||||||
PyLongObject *a;
|
PyLongObject *a;
|
||||||
int j, k;
|
int j, k;
|
||||||
|
|
||||||
if (v == NULL || w == NULL) {
|
if (v == NULL || w == NULL) {
|
||||||
Py_XDECREF(v);
|
Py_XDECREF(v);
|
||||||
Py_XDECREF(w);
|
Py_XDECREF(w);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(size_v >= size_w && size_w > 1); /* Assert checks by div() */
|
assert(size_v >= size_w && size_w > 1); /* Assert checks by div() */
|
||||||
assert(v->ob_refcnt == 1); /* Since v will be used as accumulator! */
|
assert(v->ob_refcnt == 1); /* Since v will be used as accumulator! */
|
||||||
assert(size_w == ABS(w->ob_size)); /* That's how d was calculated */
|
assert(size_w == ABS(w->ob_size)); /* That's how d was calculated */
|
||||||
|
|
||||||
size_v = ABS(v->ob_size);
|
size_v = ABS(v->ob_size);
|
||||||
a = _PyLong_New(size_v - size_w + 1);
|
a = _PyLong_New(size_v - size_w + 1);
|
||||||
|
|
||||||
for (j = size_v, k = a->ob_size-1; a != NULL && k >= 0; --j, --k) {
|
for (j = size_v, k = a->ob_size-1; a != NULL && k >= 0; --j, --k) {
|
||||||
digit vj = (j >= size_v) ? 0 : v->ob_digit[j];
|
digit vj = (j >= size_v) ? 0 : v->ob_digit[j];
|
||||||
twodigits q;
|
twodigits q;
|
||||||
stwodigits carry = 0;
|
stwodigits carry = 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
SIGCHECK({
|
SIGCHECK({
|
||||||
Py_DECREF(a);
|
Py_DECREF(a);
|
||||||
a = NULL;
|
a = NULL;
|
||||||
|
@ -1172,7 +1183,7 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem)
|
||||||
else
|
else
|
||||||
q = (((twodigits)vj << SHIFT) + v->ob_digit[j-1]) /
|
q = (((twodigits)vj << SHIFT) + v->ob_digit[j-1]) /
|
||||||
w->ob_digit[size_w-1];
|
w->ob_digit[size_w-1];
|
||||||
|
|
||||||
while (w->ob_digit[size_w-2]*q >
|
while (w->ob_digit[size_w-2]*q >
|
||||||
((
|
((
|
||||||
((twodigits)vj << SHIFT)
|
((twodigits)vj << SHIFT)
|
||||||
|
@ -1181,7 +1192,7 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem)
|
||||||
) << SHIFT)
|
) << SHIFT)
|
||||||
+ v->ob_digit[j-2])
|
+ v->ob_digit[j-2])
|
||||||
--q;
|
--q;
|
||||||
|
|
||||||
for (i = 0; i < size_w && i+k < size_v; ++i) {
|
for (i = 0; i < size_w && i+k < size_v; ++i) {
|
||||||
twodigits z = w->ob_digit[i] * q;
|
twodigits z = w->ob_digit[i] * q;
|
||||||
digit zz = (digit) (z >> SHIFT);
|
digit zz = (digit) (z >> SHIFT);
|
||||||
|
@ -1192,12 +1203,12 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem)
|
||||||
carry, SHIFT);
|
carry, SHIFT);
|
||||||
carry -= zz;
|
carry -= zz;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i+k < size_v) {
|
if (i+k < size_v) {
|
||||||
carry += v->ob_digit[i+k];
|
carry += v->ob_digit[i+k];
|
||||||
v->ob_digit[i+k] = 0;
|
v->ob_digit[i+k] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (carry == 0)
|
if (carry == 0)
|
||||||
a->ob_digit[k] = (digit) q;
|
a->ob_digit[k] = (digit) q;
|
||||||
else {
|
else {
|
||||||
|
@ -1213,7 +1224,7 @@ x_divrem(PyLongObject *v1, PyLongObject *w1, PyLongObject **prem)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} /* for j, k */
|
} /* for j, k */
|
||||||
|
|
||||||
if (a == NULL)
|
if (a == NULL)
|
||||||
*prem = NULL;
|
*prem = NULL;
|
||||||
else {
|
else {
|
||||||
|
@ -1254,7 +1265,7 @@ static int
|
||||||
long_compare(PyLongObject *a, PyLongObject *b)
|
long_compare(PyLongObject *a, PyLongObject *b)
|
||||||
{
|
{
|
||||||
int sign;
|
int sign;
|
||||||
|
|
||||||
if (a->ob_size != b->ob_size) {
|
if (a->ob_size != b->ob_size) {
|
||||||
if (ABS(a->ob_size) == 0 && ABS(b->ob_size) == 0)
|
if (ABS(a->ob_size) == 0 && ABS(b->ob_size) == 0)
|
||||||
sign = 0;
|
sign = 0;
|
||||||
|
@ -1424,7 +1435,7 @@ static PyObject *
|
||||||
long_sub(PyLongObject *v, PyLongObject *w)
|
long_sub(PyLongObject *v, PyLongObject *w)
|
||||||
{
|
{
|
||||||
PyLongObject *a, *b, *z;
|
PyLongObject *a, *b, *z;
|
||||||
|
|
||||||
CONVERT_BINOP((PyObject *)v, (PyObject *)w, &a, &b);
|
CONVERT_BINOP((PyObject *)v, (PyObject *)w, &a, &b);
|
||||||
|
|
||||||
if (a->ob_size < 0) {
|
if (a->ob_size < 0) {
|
||||||
|
@ -1457,54 +1468,28 @@ long_repeat(PyObject *v, PyLongObject *w)
|
||||||
return (*v->ob_type->tp_as_sequence->sq_repeat)(v, n);
|
return (*v->ob_type->tp_as_sequence->sq_repeat)(v, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
/* Grade school multiplication, ignoring the signs.
|
||||||
long_mul(PyLongObject *v, PyLongObject *w)
|
* Returns the absolute value of the product, or NULL if error.
|
||||||
|
*/
|
||||||
|
static PyLongObject *
|
||||||
|
x_mul(PyLongObject *a, PyLongObject *b)
|
||||||
{
|
{
|
||||||
PyLongObject *a, *b, *z;
|
PyLongObject *z;
|
||||||
int size_a;
|
int size_a = ABS(a->ob_size);
|
||||||
int size_b;
|
int size_b = ABS(b->ob_size);
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (!convert_binop((PyObject *)v, (PyObject *)w, &a, &b)) {
|
z = _PyLong_New(size_a + size_b);
|
||||||
if (!PyLong_Check(v) &&
|
if (z == NULL)
|
||||||
v->ob_type->tp_as_sequence &&
|
|
||||||
v->ob_type->tp_as_sequence->sq_repeat)
|
|
||||||
return long_repeat((PyObject *)v, w);
|
|
||||||
if (!PyLong_Check(w) &&
|
|
||||||
w->ob_type->tp_as_sequence &&
|
|
||||||
w->ob_type->tp_as_sequence->sq_repeat)
|
|
||||||
return long_repeat((PyObject *)w, v);
|
|
||||||
Py_INCREF(Py_NotImplemented);
|
|
||||||
return Py_NotImplemented;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_a = ABS(a->ob_size);
|
|
||||||
size_b = ABS(b->ob_size);
|
|
||||||
if (size_a > size_b) {
|
|
||||||
/* we are faster with the small object on the left */
|
|
||||||
int hold_sa = size_a;
|
|
||||||
PyLongObject *hold_a = a;
|
|
||||||
size_a = size_b;
|
|
||||||
size_b = hold_sa;
|
|
||||||
a = b;
|
|
||||||
b = hold_a;
|
|
||||||
}
|
|
||||||
z = _PyLong_New(size_a + size_b);
|
|
||||||
if (z == NULL) {
|
|
||||||
Py_DECREF(a);
|
|
||||||
Py_DECREF(b);
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
|
||||||
for (i = 0; i < z->ob_size; ++i)
|
memset(z->ob_digit, 0, z->ob_size * sizeof(digit));
|
||||||
z->ob_digit[i] = 0;
|
|
||||||
for (i = 0; i < size_a; ++i) {
|
for (i = 0; i < size_a; ++i) {
|
||||||
twodigits carry = 0;
|
twodigits carry = 0;
|
||||||
twodigits f = a->ob_digit[i];
|
twodigits f = a->ob_digit[i];
|
||||||
int j;
|
int j;
|
||||||
|
|
||||||
SIGCHECK({
|
SIGCHECK({
|
||||||
Py_DECREF(a);
|
|
||||||
Py_DECREF(b);
|
|
||||||
Py_DECREF(z);
|
Py_DECREF(z);
|
||||||
return NULL;
|
return NULL;
|
||||||
})
|
})
|
||||||
|
@ -1520,6 +1505,205 @@ long_mul(PyLongObject *v, PyLongObject *w)
|
||||||
carry >>= SHIFT;
|
carry >>= SHIFT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return z;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* A helper for Karatsuba multiplication (k_mul).
|
||||||
|
Takes a long "n" and an integer "size" representing the place to
|
||||||
|
split, and sets low and high such that abs(n) == (high << size) + low,
|
||||||
|
viewing the shift as being by digits. The sign bit is ignored, and
|
||||||
|
the return values are >= 0.
|
||||||
|
Returns 0 on success, -1 on failure.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
kmul_split(PyLongObject *n, int size, PyLongObject **high, PyLongObject **low)
|
||||||
|
{
|
||||||
|
PyLongObject *hi, *lo;
|
||||||
|
int size_lo, size_hi;
|
||||||
|
const int size_n = ABS(n->ob_size);
|
||||||
|
|
||||||
|
size_lo = MIN(size_n, size);
|
||||||
|
size_hi = size_n - size_lo;
|
||||||
|
|
||||||
|
if ((hi = _PyLong_New(size_hi)) == NULL)
|
||||||
|
return -1;
|
||||||
|
if ((lo = _PyLong_New(size_lo)) == NULL) {
|
||||||
|
Py_DECREF(hi);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
memcpy(lo->ob_digit, n->ob_digit, size_lo * sizeof(digit));
|
||||||
|
memcpy(hi->ob_digit, n->ob_digit + size_lo, size_hi * sizeof(digit));
|
||||||
|
|
||||||
|
*high = long_normalize(hi);
|
||||||
|
*low = long_normalize(lo);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Karatsuba multiplication. Ignores the input signs, and returns the
|
||||||
|
* absolute value of the product (or NULL if error).
|
||||||
|
* See Knuth Vol. 2 Chapter 4.3.3 (Pp. 294-295).
|
||||||
|
*/
|
||||||
|
static PyLongObject *
|
||||||
|
k_mul(PyLongObject *a, PyLongObject *b)
|
||||||
|
{
|
||||||
|
PyLongObject *ah = NULL;
|
||||||
|
PyLongObject *al = NULL;
|
||||||
|
PyLongObject *bh = NULL;
|
||||||
|
PyLongObject *bl = NULL;
|
||||||
|
PyLongObject *albl = NULL;
|
||||||
|
PyLongObject *ahbh = NULL;
|
||||||
|
PyLongObject *k = NULL;
|
||||||
|
PyLongObject *ret = NULL;
|
||||||
|
PyLongObject *t1, *t2;
|
||||||
|
int shift; /* the number of digits we split off */
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* (ah*X+al)(bh*X+bl) = ah*bh*X*X + (ah*bl + al*bh)*X + al*bl
|
||||||
|
* Let k = (ah+al)*(bh+bl) = ah*bl + al*bh + ah*bh + al*bl
|
||||||
|
* Then the original product is
|
||||||
|
* ah*bh*X*X + (k - ah*bh - ah*bl)*X + al*bl
|
||||||
|
* By picking X to be a power of 2, "*X" is just shifting, and it's
|
||||||
|
* been reduced to 3 multiplies on numbers half the size.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* We want to split based on the larger number; fiddle so that a
|
||||||
|
* is largest.
|
||||||
|
*/
|
||||||
|
if (ABS(a->ob_size) > ABS(b->ob_size)) {
|
||||||
|
t1 = a;
|
||||||
|
a = b;
|
||||||
|
b = t1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Use gradeschool math when either number is too small. */
|
||||||
|
if (ABS(a->ob_size) <= KARATSUBA_CUTOFF)
|
||||||
|
return x_mul(a, b);
|
||||||
|
|
||||||
|
shift = ABS(b->ob_size) >> 1;
|
||||||
|
if (kmul_split(a, shift, &ah, &al) < 0) goto fail;
|
||||||
|
if (kmul_split(b, shift, &bh, &bl) < 0) goto fail;
|
||||||
|
|
||||||
|
if ((ahbh = k_mul(ah, bh)) == NULL) goto fail;
|
||||||
|
assert(ahbh->ob_size >= 0);
|
||||||
|
|
||||||
|
/* Allocate result space, and copy ahbh into the high digits. */
|
||||||
|
ret = _PyLong_New(ahbh->ob_size + 2*shift + 1);
|
||||||
|
if (ret == NULL) goto fail;
|
||||||
|
#ifdef Py_DEBUG
|
||||||
|
/* Fill with trash, to catch reference to uninitialized digits. */
|
||||||
|
memset(ret->ob_digit, 0xDF, ret->ob_size * sizeof(digit));
|
||||||
|
#endif
|
||||||
|
memcpy(ret->ob_digit + 2*shift, ahbh->ob_digit,
|
||||||
|
ahbh->ob_size * sizeof(digit));
|
||||||
|
/* That didn't copy into the most-significant (overflow) digit. */
|
||||||
|
ret->ob_digit[ret->ob_size - 1] = 0;
|
||||||
|
|
||||||
|
/* Compute al*bl, and copy into the low digits. */
|
||||||
|
if ((albl = k_mul(al, bl)) == NULL) goto fail;
|
||||||
|
assert(albl->ob_size >= 0);
|
||||||
|
assert(albl->ob_size <= 2*shift); /* no overlap with high digits */
|
||||||
|
memcpy(ret->ob_digit, albl->ob_digit, albl->ob_size * sizeof(digit));
|
||||||
|
|
||||||
|
/* Zero out remaining digits. */
|
||||||
|
i = 2*shift - albl->ob_size; /* number of uninitialized digits */
|
||||||
|
if (i)
|
||||||
|
memset(ret->ob_digit + albl->ob_size, 0, i * sizeof(digit));
|
||||||
|
|
||||||
|
/* k = (ah+al)(bh+bl) */
|
||||||
|
if ((t1 = x_add(ah, al)) == NULL) goto fail;
|
||||||
|
Py_DECREF(ah);
|
||||||
|
Py_DECREF(al);
|
||||||
|
ah = al = NULL;
|
||||||
|
|
||||||
|
if ((t2 = x_add(bh, bl)) == NULL) {
|
||||||
|
Py_DECREF(t1);
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
Py_DECREF(bh);
|
||||||
|
Py_DECREF(bl);
|
||||||
|
bh = bl = NULL;
|
||||||
|
|
||||||
|
k = k_mul(t1, t2);
|
||||||
|
Py_DECREF(t1);
|
||||||
|
Py_DECREF(t2);
|
||||||
|
if (k == NULL) goto fail;
|
||||||
|
|
||||||
|
/* Subtract ahbh and albl from k. Note that this can't become
|
||||||
|
* negative, since k = ahbh + albl + other stuff.
|
||||||
|
*/
|
||||||
|
if ((t1 = x_sub(k, ahbh)) == NULL) goto fail;
|
||||||
|
Py_DECREF(k);
|
||||||
|
k = t1;
|
||||||
|
Py_DECREF(ahbh);
|
||||||
|
ahbh = NULL;
|
||||||
|
|
||||||
|
if ((t1 = x_sub(k, albl)) == NULL) goto fail;
|
||||||
|
Py_DECREF(k);
|
||||||
|
k = t1;
|
||||||
|
Py_DECREF(albl);
|
||||||
|
albl = NULL;
|
||||||
|
|
||||||
|
/* Add k into the result, at the shift-th least-significant digit. */
|
||||||
|
{
|
||||||
|
int j; /* index into k */
|
||||||
|
digit carry = 0;
|
||||||
|
|
||||||
|
for (i = shift, j = 0; j < k->ob_size; ++i, ++j) {
|
||||||
|
carry += ret->ob_digit[i] + k->ob_digit[j];
|
||||||
|
ret->ob_digit[i] = carry & MASK;
|
||||||
|
carry >>= SHIFT;
|
||||||
|
}
|
||||||
|
for (; carry && i < ret->ob_size; ++i) {
|
||||||
|
carry += ret->ob_digit[i];
|
||||||
|
ret->ob_digit[i] = carry & MASK;
|
||||||
|
carry >>= SHIFT;
|
||||||
|
}
|
||||||
|
assert(carry == 0);
|
||||||
|
}
|
||||||
|
Py_DECREF(k);
|
||||||
|
return long_normalize(ret);
|
||||||
|
|
||||||
|
fail:
|
||||||
|
Py_XDECREF(ret);
|
||||||
|
Py_XDECREF(ah);
|
||||||
|
Py_XDECREF(al);
|
||||||
|
Py_XDECREF(bh);
|
||||||
|
Py_XDECREF(bl);
|
||||||
|
Py_XDECREF(ahbh);
|
||||||
|
Py_XDECREF(albl);
|
||||||
|
Py_XDECREF(k);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
long_mul(PyLongObject *v, PyLongObject *w)
|
||||||
|
{
|
||||||
|
PyLongObject *a, *b, *z;
|
||||||
|
|
||||||
|
if (!convert_binop((PyObject *)v, (PyObject *)w, &a, &b)) {
|
||||||
|
if (!PyLong_Check(v) &&
|
||||||
|
v->ob_type->tp_as_sequence &&
|
||||||
|
v->ob_type->tp_as_sequence->sq_repeat)
|
||||||
|
return long_repeat((PyObject *)v, w);
|
||||||
|
if (!PyLong_Check(w) &&
|
||||||
|
w->ob_type->tp_as_sequence &&
|
||||||
|
w->ob_type->tp_as_sequence->sq_repeat)
|
||||||
|
return long_repeat((PyObject *)w, v);
|
||||||
|
Py_INCREF(Py_NotImplemented);
|
||||||
|
return Py_NotImplemented;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Py_GETENV("KARAT") != NULL)
|
||||||
|
z = k_mul(a, b);
|
||||||
|
else
|
||||||
|
z = x_mul(a, b);
|
||||||
|
if(z == NULL) {
|
||||||
|
Py_DECREF(a);
|
||||||
|
Py_DECREF(b);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
if (a->ob_size < 0)
|
if (a->ob_size < 0)
|
||||||
z->ob_size = -(z->ob_size);
|
z->ob_size = -(z->ob_size);
|
||||||
if (b->ob_size < 0)
|
if (b->ob_size < 0)
|
||||||
|
@ -1545,11 +1729,11 @@ long_mul(PyLongObject *v, PyLongObject *w)
|
||||||
part of the outcome to keep the invariant intact. */
|
part of the outcome to keep the invariant intact. */
|
||||||
|
|
||||||
static int
|
static int
|
||||||
l_divmod(PyLongObject *v, PyLongObject *w,
|
l_divmod(PyLongObject *v, PyLongObject *w,
|
||||||
PyLongObject **pdiv, PyLongObject **pmod)
|
PyLongObject **pdiv, PyLongObject **pmod)
|
||||||
{
|
{
|
||||||
PyLongObject *div, *mod;
|
PyLongObject *div, *mod;
|
||||||
|
|
||||||
if (long_divrem(v, w, &div, &mod) < 0)
|
if (long_divrem(v, w, &div, &mod) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
if ((mod->ob_size < 0 && w->ob_size > 0) ||
|
if ((mod->ob_size < 0 && w->ob_size > 0) ||
|
||||||
|
@ -1657,7 +1841,7 @@ overflow:
|
||||||
PyErr_SetString(PyExc_OverflowError,
|
PyErr_SetString(PyExc_OverflowError,
|
||||||
"long/long too large for a float");
|
"long/long too large for a float");
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
@ -1714,7 +1898,7 @@ long_pow(PyObject *v, PyObject *w, PyObject *x)
|
||||||
int size_b, i;
|
int size_b, i;
|
||||||
|
|
||||||
CONVERT_BINOP(v, w, &a, &b);
|
CONVERT_BINOP(v, w, &a, &b);
|
||||||
if (PyLong_Check(x) || Py_None == x) {
|
if (PyLong_Check(x) || Py_None == x) {
|
||||||
c = x;
|
c = x;
|
||||||
Py_INCREF(x);
|
Py_INCREF(x);
|
||||||
}
|
}
|
||||||
|
@ -1754,10 +1938,10 @@ long_pow(PyObject *v, PyObject *w, PyObject *x)
|
||||||
for (i = 0; i < size_b; ++i) {
|
for (i = 0; i < size_b; ++i) {
|
||||||
digit bi = b->ob_digit[i];
|
digit bi = b->ob_digit[i];
|
||||||
int j;
|
int j;
|
||||||
|
|
||||||
for (j = 0; j < SHIFT; ++j) {
|
for (j = 0; j < SHIFT; ++j) {
|
||||||
PyLongObject *temp;
|
PyLongObject *temp;
|
||||||
|
|
||||||
if (bi & 1) {
|
if (bi & 1) {
|
||||||
temp = (PyLongObject *)long_mul(z, a);
|
temp = (PyLongObject *)long_mul(z, a);
|
||||||
Py_DECREF(z);
|
Py_DECREF(z);
|
||||||
|
@ -1886,7 +2070,7 @@ long_rshift(PyLongObject *v, PyLongObject *w)
|
||||||
long shiftby;
|
long shiftby;
|
||||||
int newsize, wordshift, loshift, hishift, i, j;
|
int newsize, wordshift, loshift, hishift, i, j;
|
||||||
digit lomask, himask;
|
digit lomask, himask;
|
||||||
|
|
||||||
CONVERT_BINOP((PyObject *)v, (PyObject *)w, &a, &b);
|
CONVERT_BINOP((PyObject *)v, (PyObject *)w, &a, &b);
|
||||||
|
|
||||||
if (a->ob_size < 0) {
|
if (a->ob_size < 0) {
|
||||||
|
@ -1903,7 +2087,7 @@ long_rshift(PyLongObject *v, PyLongObject *w)
|
||||||
Py_DECREF(a2);
|
Py_DECREF(a2);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
||||||
shiftby = PyLong_AsLong((PyObject *)b);
|
shiftby = PyLong_AsLong((PyObject *)b);
|
||||||
if (shiftby == -1L && PyErr_Occurred())
|
if (shiftby == -1L && PyErr_Occurred())
|
||||||
goto rshift_error;
|
goto rshift_error;
|
||||||
|
@ -1953,7 +2137,7 @@ long_lshift(PyObject *v, PyObject *w)
|
||||||
long shiftby;
|
long shiftby;
|
||||||
int oldsize, newsize, wordshift, remshift, i, j;
|
int oldsize, newsize, wordshift, remshift, i, j;
|
||||||
twodigits accum;
|
twodigits accum;
|
||||||
|
|
||||||
CONVERT_BINOP(v, w, &a, &b);
|
CONVERT_BINOP(v, w, &a, &b);
|
||||||
|
|
||||||
shiftby = PyLong_AsLong((PyObject *)b);
|
shiftby = PyLong_AsLong((PyObject *)b);
|
||||||
|
@ -1983,7 +2167,7 @@ long_lshift(PyObject *v, PyObject *w)
|
||||||
z->ob_size = -(z->ob_size);
|
z->ob_size = -(z->ob_size);
|
||||||
for (i = 0; i < wordshift; i++)
|
for (i = 0; i < wordshift; i++)
|
||||||
z->ob_digit[i] = 0;
|
z->ob_digit[i] = 0;
|
||||||
accum = 0;
|
accum = 0;
|
||||||
for (i = wordshift, j = 0; j < oldsize; i++, j++) {
|
for (i = wordshift, j = 0; j < oldsize; i++, j++) {
|
||||||
accum |= a->ob_digit[j] << remshift;
|
accum |= a->ob_digit[j] << remshift;
|
||||||
z->ob_digit[i] = (digit)(accum & MASK);
|
z->ob_digit[i] = (digit)(accum & MASK);
|
||||||
|
@ -1991,7 +2175,7 @@ long_lshift(PyObject *v, PyObject *w)
|
||||||
}
|
}
|
||||||
if (remshift)
|
if (remshift)
|
||||||
z->ob_digit[newsize-1] = (digit)accum;
|
z->ob_digit[newsize-1] = (digit)accum;
|
||||||
else
|
else
|
||||||
assert(!accum);
|
assert(!accum);
|
||||||
z = long_normalize(z);
|
z = long_normalize(z);
|
||||||
lshift_error:
|
lshift_error:
|
||||||
|
@ -2003,11 +2187,6 @@ lshift_error:
|
||||||
|
|
||||||
/* Bitwise and/xor/or operations */
|
/* Bitwise and/xor/or operations */
|
||||||
|
|
||||||
#undef MIN
|
|
||||||
#undef MAX
|
|
||||||
#define MAX(x, y) ((x) < (y) ? (y) : (x))
|
|
||||||
#define MIN(x, y) ((x) > (y) ? (y) : (x))
|
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
long_bitwise(PyLongObject *a,
|
long_bitwise(PyLongObject *a,
|
||||||
int op, /* '&', '|', '^' */
|
int op, /* '&', '|', '^' */
|
||||||
|
@ -2020,7 +2199,7 @@ long_bitwise(PyLongObject *a,
|
||||||
int i;
|
int i;
|
||||||
digit diga, digb;
|
digit diga, digb;
|
||||||
PyObject *v;
|
PyObject *v;
|
||||||
|
|
||||||
if (a->ob_size < 0) {
|
if (a->ob_size < 0) {
|
||||||
a = (PyLongObject *) long_invert(a);
|
a = (PyLongObject *) long_invert(a);
|
||||||
maska = MASK;
|
maska = MASK;
|
||||||
|
@ -2037,7 +2216,7 @@ long_bitwise(PyLongObject *a,
|
||||||
Py_INCREF(b);
|
Py_INCREF(b);
|
||||||
maskb = 0;
|
maskb = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
negz = 0;
|
negz = 0;
|
||||||
switch (op) {
|
switch (op) {
|
||||||
case '^':
|
case '^':
|
||||||
|
@ -2063,7 +2242,7 @@ long_bitwise(PyLongObject *a,
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* JRH: The original logic here was to allocate the result value (z)
|
/* JRH: The original logic here was to allocate the result value (z)
|
||||||
as the longer of the two operands. However, there are some cases
|
as the longer of the two operands. However, there are some cases
|
||||||
where the result is guaranteed to be shorter than that: AND of two
|
where the result is guaranteed to be shorter than that: AND of two
|
||||||
|
@ -2088,7 +2267,7 @@ long_bitwise(PyLongObject *a,
|
||||||
Py_XDECREF(z);
|
Py_XDECREF(z);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (i = 0; i < size_z; ++i) {
|
for (i = 0; i < size_z; ++i) {
|
||||||
diga = (i < size_a ? a->ob_digit[i] : 0) ^ maska;
|
diga = (i < size_a ? a->ob_digit[i] : 0) ^ maska;
|
||||||
digb = (i < size_b ? b->ob_digit[i] : 0) ^ maskb;
|
digb = (i < size_b ? b->ob_digit[i] : 0) ^ maskb;
|
||||||
|
@ -2098,7 +2277,7 @@ long_bitwise(PyLongObject *a,
|
||||||
case '^': z->ob_digit[i] = diga ^ digb; break;
|
case '^': z->ob_digit[i] = diga ^ digb; break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Py_DECREF(a);
|
Py_DECREF(a);
|
||||||
Py_DECREF(b);
|
Py_DECREF(b);
|
||||||
z = long_normalize(z);
|
z = long_normalize(z);
|
||||||
|
|
Loading…
Reference in New Issue