From e0b23095ee03a11d09f38cbc689307dc5c93afda Mon Sep 17 00:00:00 2001 From: Yury Selivanov Date: Thu, 11 Feb 2016 10:26:27 -0500 Subject: [PATCH] Issues #26289 and #26315: Optimize floor/modulo div for single-digit longs Microbenchmarks show 2-2.5x improvement. Built-in 'divmod' function is now also ~10% faster. -m timeit -s "x=22331" "x//2;x//-3;x//4;x//5;x//-6;x//7;x//8;x//-99;x//100;" with patch: 0.321 without patch: 0.633 -m timeit -s "x=22331" "x%2;x%3;x%-4;x%5;x%6;x%-7;x%8;x%99;x%-100;" with patch: 0.224 without patch: 0.66 Big thanks to Serhiy Storchaka, Mark Dickinson and Victor Stinner for thorow code reviews and algorithms improvements. --- Lib/test/test_long.py | 33 ++++++++++++++++++ Misc/NEWS | 4 +++ Objects/longobject.c | 79 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 116 insertions(+) diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index 62e69a9ed78..5b8b4dc4a99 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -689,6 +689,20 @@ class LongTest(unittest.TestCase): self.assertRaises(OverflowError, int, float('-inf')) self.assertRaises(ValueError, int, float('nan')) + def test_mod_division(self): + with self.assertRaises(ZeroDivisionError): + _ = 1 % 0 + + self.assertEqual(13 % 10, 3) + self.assertEqual(-13 % 10, 7) + self.assertEqual(13 % -10, -7) + self.assertEqual(-13 % -10, -3) + + self.assertEqual(12 % 4, 0) + self.assertEqual(-12 % 4, 0) + self.assertEqual(12 % -4, 0) + self.assertEqual(-12 % -4, 0) + def test_true_division(self): huge = 1 << 40000 mhuge = -huge @@ -723,6 +737,25 @@ class LongTest(unittest.TestCase): for zero in ["huge / 0", "mhuge / 0"]: self.assertRaises(ZeroDivisionError, eval, zero, namespace) + def test_floordiv(self): + with self.assertRaises(ZeroDivisionError): + _ = 1 // 0 + + self.assertEqual(2 // 3, 0) + self.assertEqual(2 // -3, -1) + self.assertEqual(-2 // 3, -1) + self.assertEqual(-2 // -3, 0) + + self.assertEqual(-11 // -3, 3) + self.assertEqual(-11 // 3, -4) + self.assertEqual(11 // -3, -4) + self.assertEqual(11 // 3, 3) + + self.assertEqual(-12 // -3, 4) + self.assertEqual(-12 // 3, -4) + self.assertEqual(12 // -3, -4) + self.assertEqual(12 // 3, 4) + def check_truediv(self, a, b, skip_small=True): """Verify that the result of a/b is correctly rounded, by comparing it with a pure Python implementation of correctly diff --git a/Misc/NEWS b/Misc/NEWS index 0114a8670e7..d51c3f31327 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -176,6 +176,10 @@ Core and Builtins - Issue #26288: Optimize PyLong_AsDouble. +- Issues #26289 and #26315: Optimize floor and modulo division for + single-digit longs. Microbenchmarks show 2-2.5x improvement. Built-in + 'divmod' function is now also ~10% faster. + Library ------- diff --git a/Objects/longobject.c b/Objects/longobject.c index f3afb109f76..3f9837fc935 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -3502,6 +3502,52 @@ long_mul(PyLongObject *a, PyLongObject *b) return (PyObject *)z; } +/* Fast modulo division for single-digit longs. */ +static PyObject * +fast_mod(PyLongObject *a, PyLongObject *b) +{ + sdigit left = a->ob_digit[0]; + sdigit right = b->ob_digit[0]; + sdigit mod; + + assert(Py_ABS(Py_SIZE(a)) == 1); + assert(Py_ABS(Py_SIZE(b)) == 1); + + if (Py_SIZE(a) == Py_SIZE(b)) { + /* 'a' and 'b' have the same sign. */ + mod = left % right; + } + else { + /* Either 'a' or 'b' is negative. */ + mod = right - 1 - (left - 1) % right; + } + + return PyLong_FromLong(mod * Py_SIZE(b)); +} + +/* Fast floor division for single-digit longs. */ +static PyObject * +fast_floor_div(PyLongObject *a, PyLongObject *b) +{ + sdigit left = a->ob_digit[0]; + sdigit right = b->ob_digit[0]; + sdigit div; + + assert(Py_ABS(Py_SIZE(a)) == 1); + assert(Py_ABS(Py_SIZE(b)) == 1); + + if (Py_SIZE(a) == Py_SIZE(b)) { + /* 'a' and 'b' have the same sign. */ + div = left / right; + } + else { + /* Either 'a' or 'b' is negative. */ + div = -1 - (left - 1) / right; + } + + return PyLong_FromLong(div); +} + /* The / and % operators are now defined in terms of divmod(). The expression a mod b has the value a - b*floor(a/b). The long_divrem function gives the remainder after division of @@ -3529,6 +3575,30 @@ l_divmod(PyLongObject *v, PyLongObject *w, { PyLongObject *div, *mod; + if (Py_ABS(Py_SIZE(v)) == 1 && Py_ABS(Py_SIZE(w)) == 1) { + /* Fast path for single-digit longs */ + div = NULL; + if (pdiv != NULL) { + div = (PyLongObject *)fast_floor_div(v, w); + if (div == NULL) { + return -1; + } + } + if (pmod != NULL) { + mod = (PyLongObject *)fast_mod(v, w); + if (mod == NULL) { + Py_XDECREF(div); + return -1; + } + *pmod = mod; + } + if (pdiv != NULL) { + /* We only want to set `*pdiv` when `*pmod` is + set successfully. */ + *pdiv = div; + } + return 0; + } if (long_divrem(v, w, &div, &mod) < 0) return -1; if ((Py_SIZE(mod) < 0 && Py_SIZE(w) > 0) || @@ -3573,6 +3643,11 @@ long_div(PyObject *a, PyObject *b) PyLongObject *div; CHECK_BINOP(a, b); + + if (Py_ABS(Py_SIZE(a)) == 1 && Py_ABS(Py_SIZE(b)) == 1) { + return fast_floor_div((PyLongObject*)a, (PyLongObject*)b); + } + if (l_divmod((PyLongObject*)a, (PyLongObject*)b, &div, NULL) < 0) div = NULL; return (PyObject *)div; @@ -3848,6 +3923,10 @@ long_mod(PyObject *a, PyObject *b) CHECK_BINOP(a, b); + if (Py_ABS(Py_SIZE(a)) == 1 && Py_ABS(Py_SIZE(b)) == 1) { + return fast_mod((PyLongObject*)a, (PyLongObject*)b); + } + if (l_divmod((PyLongObject*)a, (PyLongObject*)b, NULL, &mod) < 0) mod = NULL; return (PyObject *)mod;