SF bug #812202: randint is always even

* Added C coded getrandbits(k) method that runs in linear time.
* Call the new method from randrange() for ranges >= 2**53.
* Adds a warning for generators not defining getrandbits() whenever they
  have a call to randrange() with too large of a population.
This commit is contained in:
Raymond Hettinger 2003-10-05 09:09:15 +00:00
parent 5c68ef04b7
commit 2f726e9093
5 changed files with 196 additions and 10 deletions

View File

@ -41,6 +41,10 @@ Class \class{Random} can also be subclassed if you want to use a
different basic generator of your own devising: in that case, override different basic generator of your own devising: in that case, override
the \method{random()}, \method{seed()}, \method{getstate()}, the \method{random()}, \method{seed()}, \method{getstate()},
\method{setstate()} and \method{jumpahead()} methods. \method{setstate()} and \method{jumpahead()} methods.
Optionally, a new generator can supply a \method{getrandombits()}
method --- this allows \method{randrange()} to produce selections
over an arbitrarily large range.
\versionadded[the \method{getrandombits()} method]{2.4}
As an example of subclassing, the \module{random} module provides As an example of subclassing, the \module{random} module provides
the \class{WichmannHill} class which implements an alternative generator the \class{WichmannHill} class which implements an alternative generator
@ -92,6 +96,14 @@ Bookkeeping functions:
separated by many steps.]{2.3} separated by many steps.]{2.3}
\end{funcdesc} \end{funcdesc}
\begin{funcdesc}{getrandbits}{k}
Returns a python \class{long} int with \var{k} random bits.
This method is supplied with the MersenneTwister generator and some
other generators may also provide it as an optional part of the API.
When available, \method{getrandbits()} enables \method{randrange()}
to handle arbitrarily large ranges.
\versionadded{2.4}
\end{funcdesc}
Functions for integers: Functions for integers:

View File

@ -39,6 +39,8 @@ General notes on the underlying Mersenne Twister core generator:
""" """
from warnings import warn as _warn
from types import MethodType as _MethodType, BuiltinMethodType as _BuiltinMethodType
from math import log as _log, exp as _exp, pi as _pi, e as _e from math import log as _log, exp as _exp, pi as _pi, e as _e
from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin
from math import floor as _floor from math import floor as _floor
@ -47,12 +49,14 @@ __all__ = ["Random","seed","random","uniform","randint","choice","sample",
"randrange","shuffle","normalvariate","lognormvariate", "randrange","shuffle","normalvariate","lognormvariate",
"expovariate","vonmisesvariate","gammavariate", "expovariate","vonmisesvariate","gammavariate",
"gauss","betavariate","paretovariate","weibullvariate", "gauss","betavariate","paretovariate","weibullvariate",
"getstate","setstate","jumpahead"] "getstate","setstate","jumpahead", "WichmannHill", "getrandbits",
"Random"]
NV_MAGICCONST = 4 * _exp(-0.5)/_sqrt(2.0) NV_MAGICCONST = 4 * _exp(-0.5)/_sqrt(2.0)
TWOPI = 2.0*_pi TWOPI = 2.0*_pi
LOG4 = _log(4.0) LOG4 = _log(4.0)
SG_MAGICCONST = 1.0 + _log(4.5) SG_MAGICCONST = 1.0 + _log(4.5)
BPF = 53 # Number of bits in a float
# Translated by Guido van Rossum from C source provided by # Translated by Guido van Rossum from C source provided by
# Adrian Baddeley. Adapted by Raymond Hettinger for use with # Adrian Baddeley. Adapted by Raymond Hettinger for use with
@ -72,6 +76,8 @@ class Random(_random.Random):
Class Random can also be subclassed if you want to use a different basic Class Random can also be subclassed if you want to use a different basic
generator of your own devising: in that case, override the following generator of your own devising: in that case, override the following
methods: random(), seed(), getstate(), setstate() and jumpahead(). methods: random(), seed(), getstate(), setstate() and jumpahead().
Optionally, implement a getrandombits() method so that randrange()
can cover arbitrarily large ranges.
""" """
@ -131,12 +137,13 @@ class Random(_random.Random):
## -------------------- integer methods ------------------- ## -------------------- integer methods -------------------
def randrange(self, start, stop=None, step=1, int=int, default=None): def randrange(self, start, stop=None, step=1, int=int, default=None,
maxwidth=1L<<BPF):
"""Choose a random item from range(start, stop[, step]). """Choose a random item from range(start, stop[, step]).
This fixes the problem with randint() which includes the This fixes the problem with randint() which includes the
endpoint; in Python this is usually not what you want. endpoint; in Python this is usually not what you want.
Do not supply the 'int' and 'default' arguments. Do not supply the 'int', 'default', and 'maxwidth' arguments.
""" """
# This code is a bit messy to make it fast for the # This code is a bit messy to make it fast for the
@ -146,6 +153,8 @@ class Random(_random.Random):
raise ValueError, "non-integer arg 1 for randrange()" raise ValueError, "non-integer arg 1 for randrange()"
if stop is default: if stop is default:
if istart > 0: if istart > 0:
if istart >= maxwidth:
return self._randbelow(istart)
return int(self.random() * istart) return int(self.random() * istart)
raise ValueError, "empty range for randrange()" raise ValueError, "empty range for randrange()"
@ -153,36 +162,43 @@ class Random(_random.Random):
istop = int(stop) istop = int(stop)
if istop != stop: if istop != stop:
raise ValueError, "non-integer stop for randrange()" raise ValueError, "non-integer stop for randrange()"
if step == 1 and istart < istop: width = istop - istart
if step == 1 and width > 0:
# Note that # Note that
# int(istart + self.random()*(istop - istart)) # int(istart + self.random()*width)
# instead would be incorrect. For example, consider istart # instead would be incorrect. For example, consider istart
# = -2 and istop = 0. Then the guts would be in # = -2 and istop = 0. Then the guts would be in
# -2.0 to 0.0 exclusive on both ends (ignoring that random() # -2.0 to 0.0 exclusive on both ends (ignoring that random()
# might return 0.0), and because int() truncates toward 0, the # might return 0.0), and because int() truncates toward 0, the
# final result would be -1 or 0 (instead of -2 or -1). # final result would be -1 or 0 (instead of -2 or -1).
# istart + int(self.random()*(istop - istart)) # istart + int(self.random()*width)
# would also be incorrect, for a subtler reason: the RHS # would also be incorrect, for a subtler reason: the RHS
# can return a long, and then randrange() would also return # can return a long, and then randrange() would also return
# a long, but we're supposed to return an int (for backward # a long, but we're supposed to return an int (for backward
# compatibility). # compatibility).
return int(istart + int(self.random()*(istop - istart)))
if width >= maxwidth:
return int(istart + self._randbelow(width))
return int(istart + int(self.random()*width))
if step == 1: if step == 1:
raise ValueError, "empty range for randrange()" raise ValueError, "empty range for randrange() (%d,%d, %d)" % (istart, istop, width)
# Non-unit step argument supplied. # Non-unit step argument supplied.
istep = int(step) istep = int(step)
if istep != step: if istep != step:
raise ValueError, "non-integer step for randrange()" raise ValueError, "non-integer step for randrange()"
if istep > 0: if istep > 0:
n = (istop - istart + istep - 1) / istep n = (width + istep - 1) / istep
elif istep < 0: elif istep < 0:
n = (istop - istart + istep + 1) / istep n = (width + istep + 1) / istep
else: else:
raise ValueError, "zero step for randrange()" raise ValueError, "zero step for randrange()"
if n <= 0: if n <= 0:
raise ValueError, "empty range for randrange()" raise ValueError, "empty range for randrange()"
if n >= maxwidth:
return istart + self._randbelow(n)
return istart + istep*int(self.random() * n) return istart + istep*int(self.random() * n)
def randint(self, a, b): def randint(self, a, b):
@ -191,6 +207,33 @@ class Random(_random.Random):
return self.randrange(a, b+1) return self.randrange(a, b+1)
def _randbelow(self, n, _log=_log, int=int, _maxwidth=1L<<BPF,
_Method=_MethodType, _BuiltinMethod=_BuiltinMethodType):
"""Return a random int in the range [0,n)
Handles the case where n has more bits than returned
by a single call to the underlying generator.
"""
try:
getrandbits = self.getrandbits
except AttributeError:
pass
else:
# Only call self.getrandbits if the original random() builtin method
# has not been overridden or if a new getrandbits() was supplied.
# This assures that the two methods correspond.
if type(self.random) is _BuiltinMethod or type(getrandbits) is _Method:
k = int(1.00001 + _log(n-1, 2.0)) # 2**k > n-1 > 2**(k-2)
r = getrandbits(k)
while r >= n:
r = getrandbits(k)
return r
if n >= _maxwidth:
_warn("Underlying random() generator does not supply \n"
"enough bits to choose from a population range this large")
return int(self.random() * n)
## -------------------- sequence methods ------------------- ## -------------------- sequence methods -------------------
def choice(self, seq): def choice(self, seq):
@ -757,6 +800,7 @@ weibullvariate = _inst.weibullvariate
getstate = _inst.getstate getstate = _inst.getstate
setstate = _inst.setstate setstate = _inst.setstate
jumpahead = _inst.jumpahead jumpahead = _inst.jumpahead
getrandbits = _inst.getrandbits
if __name__ == '__main__': if __name__ == '__main__':
_test() _test()

View File

@ -4,6 +4,7 @@ import unittest
import random import random
import time import time
import pickle import pickle
import warnings
from math import log, exp, sqrt, pi from math import log, exp, sqrt, pi
from sets import Set from sets import Set
from test import test_support from test import test_support
@ -153,6 +154,13 @@ class WichmannHill_TestBasicOps(TestBasicOps):
self.assertEqual(x1, x2) self.assertEqual(x1, x2)
self.assertEqual(y1, y2) self.assertEqual(y1, y2)
def test_bigrand(self):
# Verify warnings are raised when randrange is too large for random()
oldfilters = warnings.filters[:]
warnings.filterwarnings("error", "Underlying random")
self.assertRaises(UserWarning, self.gen.randrange, 2**60)
warnings.filters[:] = oldfilters
class MersenneTwister_TestBasicOps(TestBasicOps): class MersenneTwister_TestBasicOps(TestBasicOps):
gen = random.Random() gen = random.Random()
@ -219,6 +227,76 @@ class MersenneTwister_TestBasicOps(TestBasicOps):
seed = (1L << (10000 * 8)) - 1 # about 10K bytes seed = (1L << (10000 * 8)) - 1 # about 10K bytes
self.gen.seed(seed) self.gen.seed(seed)
def test_53_bits_per_float(self):
# This should pass whenever a C double has 53 bit precision.
span = 2 ** 53
cum = 0
for i in xrange(100):
cum |= int(self.gen.random() * span)
self.assertEqual(cum, span-1)
def test_bigrand(self):
# The randrange routine should build-up the required number of bits
# in stages so that all bit positions are active.
span = 2 ** 500
cum = 0
for i in xrange(100):
r = self.gen.randrange(span)
self.assert_(0 <= r < span)
cum |= r
self.assertEqual(cum, span-1)
def test_bigrand_ranges(self):
for i in [40,80, 160, 200, 211, 250, 375, 512, 550]:
start = self.gen.randrange(2 ** i)
stop = self.gen.randrange(2 ** (i-2))
if stop <= start:
return
self.assert_(start <= self.gen.randrange(start, stop) < stop)
def test_rangelimits(self):
for start, stop in [(-2,0), (-(2**60)-2,-(2**60)), (2**60,2**60+2)]:
self.assertEqual(Set(range(start,stop)),
Set([self.gen.randrange(start,stop) for i in xrange(100)]))
def test_genrandbits(self):
# Verify cross-platform repeatability
self.gen.seed(1234567)
self.assertEqual(self.gen.getrandbits(100),
97904845777343510404718956115L)
# Verify ranges
for k in xrange(1, 1000):
self.assert_(0 <= self.gen.getrandbits(k) < 2**k)
# Verify all bits active
getbits = self.gen.getrandbits
for span in [1, 2, 3, 4, 31, 32, 32, 52, 53, 54, 119, 127, 128, 129]:
cum = 0
for i in xrange(100):
cum |= getbits(span)
self.assertEqual(cum, 2**span-1)
def test_randbelow_logic(self, _log=log, int=int):
# check bitcount transition points: 2**i and 2**(i+1)-1
# show that: k = int(1.001 + _log(n, 2))
# is equal to or one greater than the number of bits in n
for i in xrange(1, 1000):
n = 1L << i # check an exact power of two
numbits = i+1
k = int(1.00001 + _log(n, 2))
self.assertEqual(k, numbits)
self.assert_(n == 2**(k-1))
n += n - 1 # check 1 below the next power of two
k = int(1.00001 + _log(n, 2))
self.assert_(k in [numbits, numbits+1])
self.assert_(2**k > n > 2**(k-2))
n -= n >> 15 # check a little farther below the next power of two
k = int(1.00001 + _log(n, 2))
self.assertEqual(k, numbits) # note the stronger assertion
self.assert_(2**k > n > 2**(k-1)) # note the stronger assertion
_gammacoeff = (0.9999999999995183, 676.5203681218835, -1259.139216722289, _gammacoeff = (0.9999999999995183, 676.5203681218835, -1259.139216722289,
771.3234287757674, -176.6150291498386, 12.50734324009056, 771.3234287757674, -176.6150291498386, 12.50734324009056,
-0.1385710331296526, 0.9934937113930748e-05, 0.1659470187408462e-06) -0.1385710331296526, 0.9934937113930748e-05, 0.1659470187408462e-06)

View File

@ -84,6 +84,14 @@ Library
seed. Modified to match Py2.2 behavior and use fractional seconds so seed. Modified to match Py2.2 behavior and use fractional seconds so
that successive runs are more likely to produce different sequences. that successive runs are more likely to produce different sequences.
- random.Random has a new method, getrandbits(k), which returns an int
with k random bits. This method is now an optional part of the API
for user defined generators. Any generator that defines genrandbits()
can now use randrange() for ranges with a length >= 2**53. Formerly,
randrange would return only even numbers for ranges that large (see
SF bug #812202). Generators that do not define genrandbits() now
issue a warning when randrange() is called with a range that large.
- itertools.izip() with no arguments now returns an empty iterator instead - itertools.izip() with no arguments now returns an empty iterator instead
of raising a TypeError exception. of raising a TypeError exception.

View File

@ -434,6 +434,47 @@ random_jumpahead(RandomObject *self, PyObject *n)
return Py_None; return Py_None;
} }
static PyObject *
random_getrandbits(RandomObject *self, PyObject *args)
{
int k, i, bytes;
unsigned long r;
unsigned char *bytearray;
PyObject *result;
if (!PyArg_ParseTuple(args, "i:getrandbits", &k))
return NULL;
if (k <= 0) {
PyErr_SetString(PyExc_ValueError,
"number of bits must be greater than zero");
return NULL;
}
bytes = ((k - 1) / 32 + 1) * 4;
bytearray = (unsigned char *)PyMem_Malloc(bytes);
if (bytearray == NULL) {
PyErr_NoMemory();
return NULL;
}
/* Fill-out whole words, byte-by-byte to avoid endianness issues */
for (i=0 ; i<bytes ; i+=4, k-=32) {
r = genrand_int32(self);
if (k < 32)
r >>= (32 - k);
bytearray[i+0] = (unsigned char)r;
bytearray[i+1] = (unsigned char)(r >> 8);
bytearray[i+2] = (unsigned char)(r >> 16);
bytearray[i+3] = (unsigned char)(r >> 24);
}
/* little endian order to match bytearray assignment order */
result = _PyLong_FromByteArray(bytearray, bytes, 1, 0);
PyMem_Free(bytearray);
return result;
}
static PyObject * static PyObject *
random_new(PyTypeObject *type, PyObject *args, PyObject *kwds) random_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{ {
@ -464,6 +505,9 @@ static PyMethodDef random_methods[] = {
{"jumpahead", (PyCFunction)random_jumpahead, METH_O, {"jumpahead", (PyCFunction)random_jumpahead, METH_O,
PyDoc_STR("jumpahead(int) -> None. Create new state from " PyDoc_STR("jumpahead(int) -> None. Create new state from "
"existing state and integer.")}, "existing state and integer.")},
{"getrandbits", (PyCFunction)random_getrandbits, METH_VARARGS,
PyDoc_STR("getrandbits(k) -> x. Generates a long int with "
"k random bits.")},
{NULL, NULL} /* sentinel */ {NULL, NULL} /* sentinel */
}; };