diff --git a/Misc/ACKS b/Misc/ACKS index cf023c9af92..6df339c3e14 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1860,6 +1860,7 @@ Kurt Vile Norman Vine Pauli Virtanen Frank Visser +Jeremiah Vivian (Pascual) Johannes Vogel Michael Vogt Radu Voicilas diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-01-17-23-12-01.bpo-46407.2_5a7R.rst b/Misc/NEWS.d/next/Core and Builtins/2022-01-17-23-12-01.bpo-46407.2_5a7R.rst new file mode 100644 index 00000000000..e7f555f1ffc --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-01-17-23-12-01.bpo-46407.2_5a7R.rst @@ -0,0 +1 @@ +Optimize some modulo operations in ``Objects/longobject.c``. Patch by Jeremiah Vivian. diff --git a/Objects/longobject.c b/Objects/longobject.c index 5f0cc579c2c..cc4ceec21aa 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1670,6 +1670,35 @@ divrem1(PyLongObject *a, digit n, digit *prem) return long_normalize(z); } +/* Remainder of long pin, w/ size digits, by non-zero digit n, + returning the remainder. pin points at the LSD. */ + +static digit +inplace_rem1(digit *pin, Py_ssize_t size, digit n) +{ + twodigits rem = 0; + + assert(n > 0 && n <= PyLong_MASK); + while (--size >= 0) + rem = ((rem << PyLong_SHIFT) | pin[size]) % n; + return (digit)rem; +} + +/* Get the remainder of an integer divided by a digit, returning + the remainder as the result of the function. The sign of a is + ignored; n should not be zero. */ + +static PyLongObject * +rem1(PyLongObject *a, digit n) +{ + const Py_ssize_t size = Py_ABS(Py_SIZE(a)); + + assert(n > 0 && n <= PyLong_MASK); + return (PyLongObject *)PyLong_FromLong( + (long)inplace_rem1(a->ob_digit, size, n) + ); +} + /* Convert an integer to a base 10 string. Returns a new non-shared string. (Return value is non-shared so that callers can modify the returned value if necessary.) */ @@ -2689,6 +2718,47 @@ long_divrem(PyLongObject *a, PyLongObject *b, return 0; } +/* Int remainder, top-level routine */ + +static int +long_rem(PyLongObject *a, PyLongObject *b, PyLongObject **prem) +{ + Py_ssize_t size_a = Py_ABS(Py_SIZE(a)), size_b = Py_ABS(Py_SIZE(b)); + + if (size_b == 0) { + PyErr_SetString(PyExc_ZeroDivisionError, + "integer modulo by zero"); + return -1; + } + if (size_a < size_b || + (size_a == size_b && + a->ob_digit[size_a-1] < b->ob_digit[size_b-1])) { + /* |a| < |b|. */ + *prem = (PyLongObject *)long_long((PyObject *)a); + return -(*prem == NULL); + } + if (size_b == 1) { + *prem = rem1(a, b->ob_digit[0]); + if (*prem == NULL) + return -1; + } + else { + /* Slow path using divrem. */ + x_divrem(a, b, prem); + if (*prem == NULL) + return -1; + } + /* Set the sign. */ + if (Py_SIZE(a) < 0 && Py_SIZE(*prem) != 0) { + _PyLong_Negate(prem); + if (*prem == NULL) { + Py_CLEAR(*prem); + return -1; + } + } + return 0; +} + /* Unsigned int division with remainder -- the algorithm. The arguments v1 and w1 should satisfy 2 <= Py_ABS(Py_SIZE(w1)) <= Py_ABS(Py_SIZE(v1)). */ @@ -3814,6 +3884,37 @@ l_divmod(PyLongObject *v, PyLongObject *w, return 0; } +/* Compute + * *pmod = v % w + * pmod cannot be NULL. The caller owns a reference to pmod. + */ +static int +l_mod(PyLongObject *v, PyLongObject *w, PyLongObject **pmod) +{ + PyLongObject *mod; + + assert(pmod); + if (Py_ABS(Py_SIZE(v)) == 1 && Py_ABS(Py_SIZE(w)) == 1) { + /* Fast path for single-digit longs */ + *pmod = (PyLongObject *)fast_mod(v, w); + return -(*pmod == NULL); + } + if (long_rem(v, w, &mod) < 0) + return -1; + if ((Py_SIZE(mod) < 0 && Py_SIZE(w) > 0) || + (Py_SIZE(mod) > 0 && Py_SIZE(w) < 0)) { + PyLongObject *temp; + temp = (PyLongObject *) long_add(mod, w); + Py_DECREF(mod); + mod = temp; + if (mod == NULL) + return -1; + } + *pmod = mod; + + return 0; +} + static PyObject * long_div(PyObject *a, PyObject *b) { @@ -4100,11 +4201,7 @@ long_mod(PyObject *a, PyObject *b) CHECK_BINOP(a, b); - if (Py_ABS(Py_SIZE(a)) == 1 && Py_ABS(Py_SIZE(b)) == 1) { - return fast_mod((PyLongObject*)a, (PyLongObject*)b); - } - - if (l_divmod((PyLongObject*)a, (PyLongObject*)b, NULL, &mod) < 0) + if (l_mod((PyLongObject*)a, (PyLongObject*)b, &mod) < 0) mod = NULL; return (PyObject *)mod; } @@ -4333,10 +4430,10 @@ long_pow(PyObject *v, PyObject *w, PyObject *x) while the "large exponent" case multiplies directly by base 31 times. It can be unboundedly faster to multiply by base % modulus instead. - We could _always_ do this reduction, but l_divmod() isn't cheap, + We could _always_ do this reduction, but l_mod() isn't cheap, so we only do it when it buys something. */ if (Py_SIZE(a) < 0 || Py_SIZE(a) > Py_SIZE(c)) { - if (l_divmod(a, c, NULL, &temp) < 0) + if (l_mod(a, c, &temp) < 0) goto Error; Py_DECREF(a); a = temp; @@ -4357,7 +4454,7 @@ long_pow(PyObject *v, PyObject *w, PyObject *x) #define REDUCE(X) \ do { \ if (c != NULL) { \ - if (l_divmod(X, c, NULL, &temp) < 0) \ + if (l_mod(X, c, &temp) < 0) \ goto Error; \ Py_XDECREF(X); \ X = temp; \ @@ -5022,7 +5119,7 @@ _PyLong_GCD(PyObject *aarg, PyObject *barg) if (k == 0) { /* no progress; do a Euclidean step */ - if (l_divmod(a, b, NULL, &r) < 0) + if (l_mod(a, b, &r) < 0) goto error; Py_DECREF(a); a = b;