SF bug #812202: randint is always even

* Added C coded getrandbits(k) method that runs in linear time. * Call the new method from randrange() for ranges >= 2**53. * Adds a warning for generators not defining getrandbits() whenever they have a call to randrange() with too large of a population.
2003-10-05 09:09:15 +00:00 · 2003-10-05 09:09:15 +00:00 · 2f726e9093
parent 5c68ef04b7
commit 2f726e9093
5 changed files with 196 additions and 10 deletions
--- a/Doc/lib/librandom.tex
+++ b/Doc/lib/librandom.tex
@ -41,6 +41,10 @@ Class \class{Random} can also be subclassed if you want to use a
 different basic generator of your own devising: in that case, override
 the \method{random()}, \method{seed()}, \method{getstate()},
 \method{setstate()} and \method{jumpahead()} methods.
+Optionally, a new generator can supply a \method{getrandombits()}
+method --- this allows \method{randrange()} to produce selections
+over an arbitrarily large range.
+\versionadded[the \method{getrandombits()} method]{2.4}

 As an example of subclassing, the \module{random} module provides
 the \class{WichmannHill} class which implements an alternative generator
@ -92,6 +96,14 @@ Bookkeeping functions:
  separated by many steps.]{2.3}
 \end{funcdesc}

+\begin{funcdesc}{getrandbits}{k}
+  Returns a python \class{long} int with \var{k} random bits.
+  This method is supplied with the MersenneTwister generator and some
+  other generators may also provide it as an optional part of the API.
+  When available, \method{getrandbits()} enables \method{randrange()}
+  to handle arbitrarily large ranges.
+  \versionadded{2.4}   
+\end{funcdesc} 

 Functions for integers:

--- a/Lib/random.py
+++ b/Lib/random.py
@ -39,6 +39,8 @@ General notes on the underlying Mersenne Twister core generator:

 """

+from warnings import warn as _warn
+from types import MethodType as _MethodType, BuiltinMethodType as _BuiltinMethodType
 from math import log as _log, exp as _exp, pi as _pi, e as _e
 from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin
 from math import floor as _floor
@ -47,12 +49,14 @@ __all__ = ["Random","seed","random","uniform","randint","choice","sample",
           "randrange","shuffle","normalvariate","lognormvariate",
           "expovariate","vonmisesvariate","gammavariate",
           "gauss","betavariate","paretovariate","weibullvariate",
-           "getstate","setstate","jumpahead"]
+           "getstate","setstate","jumpahead", "WichmannHill", "getrandbits",
+           "Random"]

 NV_MAGICCONST = 4 * _exp(-0.5)/_sqrt(2.0)
 TWOPI = 2.0*_pi
 LOG4 = _log(4.0)
 SG_MAGICCONST = 1.0 + _log(4.5)
+BPF = 53        # Number of bits in a float

 # Translated by Guido van Rossum from C source provided by
 # Adrian Baddeley.  Adapted by Raymond Hettinger for use with
@ -72,6 +76,8 @@ class Random(_random.Random):
    Class Random can also be subclassed if you want to use a different basic
    generator of your own devising: in that case, override the following
    methods:  random(), seed(), getstate(), setstate() and jumpahead().
+    Optionally, implement a getrandombits() method so that randrange()
+    can cover arbitrarily large ranges.

    """

@ -131,12 +137,13 @@ class Random(_random.Random):

 ## -------------------- integer methods  -------------------

-    def randrange(self, start, stop=None, step=1, int=int, default=None):
+    def randrange(self, start, stop=None, step=1, int=int, default=None,
+                  maxwidth=1L<<BPF):
        """Choose a random item from range(start, stop[, step]).

        This fixes the problem with randint() which includes the
        endpoint; in Python this is usually not what you want.
-        Do not supply the 'int' and 'default' arguments.
+        Do not supply the 'int', 'default', and 'maxwidth' arguments.
        """

        # This code is a bit messy to make it fast for the
@ -146,6 +153,8 @@ class Random(_random.Random):
            raise ValueError, "non-integer arg 1 for randrange()"
        if stop is default:
            if istart > 0:
+                if istart >= maxwidth:
+                    return self._randbelow(istart)
                return int(self.random() * istart)
            raise ValueError, "empty range for randrange()"

@ -153,36 +162,43 @@ class Random(_random.Random):
        istop = int(stop)
        if istop != stop:
            raise ValueError, "non-integer stop for randrange()"
-        if step == 1 and istart < istop:
+        width = istop - istart
+        if step == 1 and width > 0:
            # Note that
-            #     int(istart + self.random()*(istop - istart))
+            #     int(istart + self.random()*width)
            # instead would be incorrect.  For example, consider istart
            # = -2 and istop = 0.  Then the guts would be in
            # -2.0 to 0.0 exclusive on both ends (ignoring that random()
            # might return 0.0), and because int() truncates toward 0, the
            # final result would be -1 or 0 (instead of -2 or -1).
-            #     istart + int(self.random()*(istop - istart))
+            #     istart + int(self.random()*width)
            # would also be incorrect, for a subtler reason:  the RHS
            # can return a long, and then randrange() would also return
            # a long, but we're supposed to return an int (for backward
            # compatibility).
-            return int(istart + int(self.random()*(istop - istart)))
+
+            if width >= maxwidth:
+                    return int(istart + self._randbelow(width))
+            return int(istart + int(self.random()*width))
        if step == 1:
-            raise ValueError, "empty range for randrange()"
+            raise ValueError, "empty range for randrange() (%d,%d, %d)" % (istart, istop, width)

        # Non-unit step argument supplied.
        istep = int(step)
        if istep != step:
            raise ValueError, "non-integer step for randrange()"
        if istep > 0:
-            n = (istop - istart + istep - 1) / istep
+            n = (width + istep - 1) / istep
        elif istep < 0:
-            n = (istop - istart + istep + 1) / istep
+            n = (width + istep + 1) / istep
        else:
            raise ValueError, "zero step for randrange()"

        if n <= 0:
            raise ValueError, "empty range for randrange()"
+
+        if n >= maxwidth:
+            return istart + self._randbelow(n)
        return istart + istep*int(self.random() * n)

    def randint(self, a, b):
@ -191,6 +207,33 @@ class Random(_random.Random):

        return self.randrange(a, b+1)

+    def _randbelow(self, n, _log=_log, int=int, _maxwidth=1L<<BPF,
+                   _Method=_MethodType, _BuiltinMethod=_BuiltinMethodType):
+        """Return a random int in the range [0,n)
+
+        Handles the case where n has more bits than returned
+        by a single call to the underlying generator.
+        """
+
+        try:
+            getrandbits = self.getrandbits
+        except AttributeError:
+            pass
+        else:
+            # Only call self.getrandbits if the original random() builtin method
+            # has not been overridden or if a new getrandbits() was supplied.
+            # This assures that the two methods correspond.
+            if type(self.random) is _BuiltinMethod or type(getrandbits) is _Method:
+                k = int(1.00001 + _log(n-1, 2.0))   # 2**k > n-1 > 2**(k-2)
+                r = getrandbits(k)
+                while r >= n:
+                    r = getrandbits(k)
+                return r
+        if n >= _maxwidth:
+            _warn("Underlying random() generator does not supply \n"
+                "enough bits to choose from a population range this large")
+        return int(self.random() * n)
+
 ## -------------------- sequence methods  -------------------

    def choice(self, seq):
@ -757,6 +800,7 @@ weibullvariate = _inst.weibullvariate
 getstate = _inst.getstate
 setstate = _inst.setstate
 jumpahead = _inst.jumpahead
+getrandbits = _inst.getrandbits

 if __name__ == '__main__':
    _test()
--- a/Lib/test/test_random.py
+++ b/Lib/test/test_random.py
@ -4,6 +4,7 @@ import unittest
 import random
 import time
 import pickle
+import warnings
 from math import log, exp, sqrt, pi
 from sets import Set
 from test import test_support
@ -153,6 +154,13 @@ class WichmannHill_TestBasicOps(TestBasicOps):
            self.assertEqual(x1, x2)
            self.assertEqual(y1, y2)

+    def test_bigrand(self):
+        # Verify warnings are raised when randrange is too large for random()
+        oldfilters = warnings.filters[:]
+        warnings.filterwarnings("error", "Underlying random")
+        self.assertRaises(UserWarning, self.gen.randrange, 2**60)
+        warnings.filters[:] = oldfilters
+
 class MersenneTwister_TestBasicOps(TestBasicOps):
    gen = random.Random()

@ -219,6 +227,76 @@ class MersenneTwister_TestBasicOps(TestBasicOps):
        seed = (1L << (10000 * 8)) - 1  # about 10K bytes
        self.gen.seed(seed)

+    def test_53_bits_per_float(self):
+        # This should pass whenever a C double has 53 bit precision.
+        span = 2 ** 53
+        cum = 0
+        for i in xrange(100):
+            cum |= int(self.gen.random() * span)
+        self.assertEqual(cum, span-1)
+
+    def test_bigrand(self):
+        # The randrange routine should build-up the required number of bits
+        # in stages so that all bit positions are active.
+        span = 2 ** 500
+        cum = 0
+        for i in xrange(100):
+            r = self.gen.randrange(span)
+            self.assert_(0 <= r < span)
+            cum |= r
+        self.assertEqual(cum, span-1)
+
+    def test_bigrand_ranges(self):
+        for i in [40,80, 160, 200, 211, 250, 375, 512, 550]:
+            start = self.gen.randrange(2 ** i)
+            stop = self.gen.randrange(2 ** (i-2))
+            if stop <= start:
+                return
+            self.assert_(start <= self.gen.randrange(start, stop) < stop)
+
+    def test_rangelimits(self):
+        for start, stop in [(-2,0), (-(2**60)-2,-(2**60)), (2**60,2**60+2)]:
+            self.assertEqual(Set(range(start,stop)),
+                Set([self.gen.randrange(start,stop) for i in xrange(100)]))
+
+    def test_genrandbits(self):
+        # Verify cross-platform repeatability
+        self.gen.seed(1234567)
+        self.assertEqual(self.gen.getrandbits(100),
+                         97904845777343510404718956115L)
+        # Verify ranges
+        for k in xrange(1, 1000):
+            self.assert_(0 <= self.gen.getrandbits(k) < 2**k)
+
+        # Verify all bits active
+        getbits = self.gen.getrandbits
+        for span in [1, 2, 3, 4, 31, 32, 32, 52, 53, 54, 119, 127, 128, 129]:
+            cum = 0
+            for i in xrange(100):
+                cum |= getbits(span)
+            self.assertEqual(cum, 2**span-1)
+
+    def test_randbelow_logic(self, _log=log, int=int):
+        # check bitcount transition points:  2**i and 2**(i+1)-1
+        # show that: k = int(1.001 + _log(n, 2))
+        # is equal to or one greater than the number of bits in n
+        for i in xrange(1, 1000):
+            n = 1L << i # check an exact power of two
+            numbits = i+1
+            k = int(1.00001 + _log(n, 2))
+            self.assertEqual(k, numbits)
+            self.assert_(n == 2**(k-1))
+
+            n += n - 1      # check 1 below the next power of two
+            k = int(1.00001 + _log(n, 2))
+            self.assert_(k in [numbits, numbits+1])
+            self.assert_(2**k > n > 2**(k-2))
+
+            n -= n >> 15     # check a little farther below the next power of two
+            k = int(1.00001 + _log(n, 2))
+            self.assertEqual(k, numbits)        # note the stronger assertion
+            self.assert_(2**k > n > 2**(k-1))   # note the stronger assertion
+
 _gammacoeff = (0.9999999999995183, 676.5203681218835, -1259.139216722289,
              771.3234287757674,  -176.6150291498386, 12.50734324009056,
              -0.1385710331296526, 0.9934937113930748e-05, 0.1659470187408462e-06)
--- a/Misc/NEWS
+++ b/Misc/NEWS
@ -84,6 +84,14 @@ Library
  seed.  Modified to match Py2.2 behavior and use fractional seconds so
  that successive runs are more likely to produce different sequences.

+- random.Random has a new method, getrandbits(k), which returns an int
+  with k random bits.  This method is now an optional part of the API
+  for user defined generators.  Any generator that defines genrandbits()
+  can now use randrange() for ranges with a length >= 2**53.  Formerly,
+  randrange would return only even numbers for ranges that large (see
+  SF bug #812202).  Generators that do not define genrandbits() now
+  issue a warning when randrange() is called with a range that large.
+
 - itertools.izip() with no arguments now returns an empty iterator instead
  of raising a TypeError exception.

--- a/Modules/_randommodule.c
+++ b/Modules/_randommodule.c
@ -434,6 +434,47 @@ random_jumpahead(RandomObject *self, PyObject *n)
 	return Py_None;
 }

+static PyObject *
+random_getrandbits(RandomObject *self, PyObject *args)
+{
+	int k, i, bytes;
+	unsigned long r;
+	unsigned char *bytearray;
+	PyObject *result;
+
+	if (!PyArg_ParseTuple(args, "i:getrandbits", &k))
+		return NULL;
+
+	if (k <= 0) {
+		PyErr_SetString(PyExc_ValueError,
+				"number of bits must be greater than zero");
+		return NULL;
+	}
+
+	bytes = ((k - 1) / 32 + 1) * 4;
+	bytearray = (unsigned char *)PyMem_Malloc(bytes);
+	if (bytearray == NULL) {
+		PyErr_NoMemory();
+		return NULL;
+	}
+
+	/* Fill-out whole words, byte-by-byte to avoid endianness issues */
+	for (i=0 ; i<bytes ; i+=4, k-=32) {
+		r = genrand_int32(self);
+		if (k < 32)
+			r >>= (32 - k);
+		bytearray[i+0] = (unsigned char)r;
+		bytearray[i+1] = (unsigned char)(r >> 8);
+		bytearray[i+2] = (unsigned char)(r >> 16);
+		bytearray[i+3] = (unsigned char)(r >> 24);
+	}
+
+	/* little endian order to match bytearray assignment order */
+	result = _PyLong_FromByteArray(bytearray, bytes, 1, 0);
+	PyMem_Free(bytearray);
+	return result;
+}
+
 static PyObject *
 random_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 {
@ -464,6 +505,9 @@ static PyMethodDef random_methods[] = {
 	{"jumpahead",	(PyCFunction)random_jumpahead,	METH_O,
 		PyDoc_STR("jumpahead(int) -> None.  Create new state from "
 			  "existing state and integer.")},
+	{"getrandbits",	(PyCFunction)random_getrandbits,  METH_VARARGS,
+		PyDoc_STR("getrandbits(k) -> x.  Generates a long int with "
+			  "k random bits.")},
 	{NULL,		NULL}		/* sentinel */
 };