From ef19bad7d6da99575d66c1f5dc8fd6ac57e92f6e Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 25 Jun 2020 17:03:50 -0700 Subject: [PATCH] Improve code organization for the random module (GH-21161) --- Lib/random.py | 353 +++++++++++++++++++++++++------------------------- 1 file changed, 176 insertions(+), 177 deletions(-) diff --git a/Lib/random.py b/Lib/random.py index ae7b5cf4e72..a6454f520df 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -1,5 +1,9 @@ """Random variable generators. + bytes + ----- + uniform bytes (values between 0 and 255) + integers -------- uniform within range @@ -37,6 +41,10 @@ General notes on the underlying Mersenne Twister core generator: """ +# Translated by Guido van Rossum from C source provided by +# Adrian Baddeley. Adapted by Raymond Hettinger for use with +# the Mersenne Twister and os.urandom() core generators. + from warnings import warn as _warn from math import log as _log, exp as _exp, pi as _pi, e as _e, ceil as _ceil from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin @@ -46,6 +54,7 @@ from _collections_abc import Set as _Set, Sequence as _Sequence from itertools import accumulate as _accumulate, repeat as _repeat from bisect import bisect as _bisect import os as _os +import _random try: # hashlib is pretty heavy to load, try lean internal module first @@ -54,7 +63,6 @@ except ImportError: # fallback to official implementation from hashlib import sha512 as _sha512 - __all__ = [ "Random", "SystemRandom", @@ -89,13 +97,6 @@ BPF = 53 # Number of bits in a float RECIP_BPF = 2 ** -BPF -# Translated by Guido van Rossum from C source provided by -# Adrian Baddeley. Adapted by Raymond Hettinger for use with -# the Mersenne Twister and os.urandom() core generators. - -import _random - - class Random(_random.Random): """Random number generator base class used by bound module functions. @@ -121,26 +122,6 @@ class Random(_random.Random): self.seed(x) self.gauss_next = None - def __init_subclass__(cls, /, **kwargs): - """Control how subclasses generate random integers. - - The algorithm a subclass can use depends on the random() and/or - getrandbits() implementation available to it and determines - whether it can generate random integers from arbitrarily large - ranges. - """ - - for c in cls.__mro__: - if '_randbelow' in c.__dict__: - # just inherit it - break - if 'getrandbits' in c.__dict__: - cls._randbelow = cls._randbelow_with_getrandbits - break - if 'random' in c.__dict__: - cls._randbelow = cls._randbelow_without_getrandbits - break - def seed(self, a=None, version=2): """Initialize internal state from a seed. @@ -210,14 +191,11 @@ class Random(_random.Random): "Random.setstate() of version %s" % (version, self.VERSION)) - ## ---- Methods below this point do not need to be overridden when - ## ---- subclassing for the purpose of using a different core generator. - ## -------------------- bytes methods --------------------- + ## ------------------------------------------------------- + ## ---- Methods below this point do not need to be overridden or extended + ## ---- when subclassing for the purpose of using a different core generator. - def randbytes(self, n): - """Generate n random bytes.""" - return self.getrandbits(n * 8).to_bytes(n, 'little') ## -------------------- pickle support ------------------- @@ -233,6 +211,80 @@ class Random(_random.Random): def __reduce__(self): return self.__class__, (), self.getstate() + + ## ---- internal support method for evenly distributed integers ---- + + def __init_subclass__(cls, /, **kwargs): + """Control how subclasses generate random integers. + + The algorithm a subclass can use depends on the random() and/or + getrandbits() implementation available to it and determines + whether it can generate random integers from arbitrarily large + ranges. + """ + + for c in cls.__mro__: + if '_randbelow' in c.__dict__: + # just inherit it + break + if 'getrandbits' in c.__dict__: + cls._randbelow = cls._randbelow_with_getrandbits + break + if 'random' in c.__dict__: + cls._randbelow = cls._randbelow_without_getrandbits + break + + def _randbelow_with_getrandbits(self, n): + "Return a random int in the range [0,n). Returns 0 if n==0." + + if not n: + return 0 + getrandbits = self.getrandbits + k = n.bit_length() # don't use (n-1) here because n can be 1 + r = getrandbits(k) # 0 <= r < 2**k + while r >= n: + r = getrandbits(k) + return r + + def _randbelow_without_getrandbits(self, n, maxsize=1<= maxsize: + _warn("Underlying random() generator does not supply \n" + "enough bits to choose from a population range this large.\n" + "To remove the range limitation, add a getrandbits() method.") + return _floor(random() * n) + if n == 0: + return 0 + rem = maxsize % n + limit = (maxsize - rem) / maxsize # int(limit * maxsize) % n == 0 + r = random() + while r >= limit: + r = random() + return _floor(r * maxsize) % n + + _randbelow = _randbelow_with_getrandbits + + + ## -------------------------------------------------------- + ## ---- Methods below this point generate custom distributions + ## ---- based on the methods defined above. They do not + ## ---- directly touch the underlying generator and only + ## ---- access randomness through the methods: random(), + ## ---- getrandbits(), or _randbelow(). + + + ## -------------------- bytes methods --------------------- + + def randbytes(self, n): + """Generate n random bytes.""" + return self.getrandbits(n * 8).to_bytes(n, 'little') + + ## -------------------- integer methods ------------------- def randrange(self, start, stop=None, step=1): @@ -285,40 +337,6 @@ class Random(_random.Random): return self.randrange(a, b+1) - def _randbelow_with_getrandbits(self, n): - "Return a random int in the range [0,n). Returns 0 if n==0." - - if not n: - return 0 - getrandbits = self.getrandbits - k = n.bit_length() # don't use (n-1) here because n can be 1 - r = getrandbits(k) # 0 <= r < 2**k - while r >= n: - r = getrandbits(k) - return r - - def _randbelow_without_getrandbits(self, n, maxsize=1<= maxsize: - _warn("Underlying random() generator does not supply \n" - "enough bits to choose from a population range this large.\n" - "To remove the range limitation, add a getrandbits() method.") - return _floor(random() * n) - if n == 0: - return 0 - rem = maxsize % n - limit = (maxsize - rem) / maxsize # int(limit * maxsize) % n == 0 - r = random() - while r >= limit: - r = random() - return _floor(r * maxsize) % n - - _randbelow = _randbelow_with_getrandbits ## -------------------- sequence methods ------------------- @@ -479,16 +497,13 @@ class Random(_random.Random): return [population[bisect(cum_weights, random() * total, 0, hi)] for i in _repeat(None, k)] - ## -------------------- real-valued distributions ------------------- - ## -------------------- uniform distribution ------------------- + ## -------------------- real-valued distributions ------------------- def uniform(self, a, b): "Get a random number in the range [a, b) or [a, b] depending on rounding." return a + (b - a) * self.random() - ## -------------------- triangular -------------------- - def triangular(self, low=0.0, high=1.0, mode=None): """Triangular distribution. @@ -509,16 +524,12 @@ class Random(_random.Random): low, high = high, low return low + (high - low) * _sqrt(u * c) - ## -------------------- normal distribution -------------------- - def normalvariate(self, mu, sigma): """Normal distribution. mu is the mean, and sigma is the standard deviation. """ - # mu = mean, sigma = standard deviation - # Uses Kinderman and Monahan method. Reference: Kinderman, # A.J. and Monahan, J.F., "Computer generation of random # variables using the ratio of uniform deviates", ACM Trans @@ -534,7 +545,43 @@ class Random(_random.Random): break return mu + z * sigma - ## -------------------- lognormal distribution -------------------- + def gauss(self, mu, sigma): + """Gaussian distribution. + + mu is the mean, and sigma is the standard deviation. This is + slightly faster than the normalvariate() function. + + Not thread-safe without a lock around calls. + + """ + # When x and y are two variables from [0, 1), uniformly + # distributed, then + # + # cos(2*pi*x)*sqrt(-2*log(1-y)) + # sin(2*pi*x)*sqrt(-2*log(1-y)) + # + # are two *independent* variables with normal distribution + # (mu = 0, sigma = 1). + # (Lambert Meertens) + # (corrected version; bug discovered by Mike Miller, fixed by LM) + + # Multithreading note: When two threads call this function + # simultaneously, it is possible that they will receive the + # same return value. The window is very small though. To + # avoid this, you have to use a lock around all calls. (I + # didn't want to slow this down in the serial case by using a + # lock here.) + + random = self.random + z = self.gauss_next + self.gauss_next = None + if z is None: + x2pi = random() * TWOPI + g2rad = _sqrt(-2.0 * _log(1.0 - random())) + z = _cos(x2pi) * g2rad + self.gauss_next = _sin(x2pi) * g2rad + + return mu + z * sigma def lognormvariate(self, mu, sigma): """Log normal distribution. @@ -546,8 +593,6 @@ class Random(_random.Random): """ return _exp(self.normalvariate(mu, sigma)) - ## -------------------- exponential distribution -------------------- - def expovariate(self, lambd): """Exponential distribution. @@ -565,8 +610,6 @@ class Random(_random.Random): # possibility of taking the log of zero. return -_log(1.0 - self.random()) / lambd - ## -------------------- von Mises distribution -------------------- - def vonmisesvariate(self, mu, kappa): """Circular data distribution. @@ -576,10 +619,6 @@ class Random(_random.Random): to a uniform random angle over the range 0 to 2*pi. """ - # mu: mean angle (in radians between 0 and 2*pi) - # kappa: concentration parameter kappa (>= 0) - # if kappa = 0 generate uniform random angle - # Based upon an algorithm published in: Fisher, N.I., # "Statistical Analysis of Circular Data", Cambridge # University Press, 1993. @@ -613,8 +652,6 @@ class Random(_random.Random): return theta - ## -------------------- gamma distribution -------------------- - def gammavariate(self, alpha, beta): """Gamma distribution. Not the gamma function! @@ -627,7 +664,6 @@ class Random(_random.Random): math.gamma(alpha) * beta ** alpha """ - # alpha > 0, beta > 0, mean is alpha*beta, variance is alpha*beta**2 # Warning: a few older sources define the gamma distribution in terms @@ -681,61 +717,6 @@ class Random(_random.Random): break return x * beta - ## -------------------- Gauss (faster alternative) -------------------- - - def gauss(self, mu, sigma): - """Gaussian distribution. - - mu is the mean, and sigma is the standard deviation. This is - slightly faster than the normalvariate() function. - - Not thread-safe without a lock around calls. - - """ - - # When x and y are two variables from [0, 1), uniformly - # distributed, then - # - # cos(2*pi*x)*sqrt(-2*log(1-y)) - # sin(2*pi*x)*sqrt(-2*log(1-y)) - # - # are two *independent* variables with normal distribution - # (mu = 0, sigma = 1). - # (Lambert Meertens) - # (corrected version; bug discovered by Mike Miller, fixed by LM) - - # Multithreading note: When two threads call this function - # simultaneously, it is possible that they will receive the - # same return value. The window is very small though. To - # avoid this, you have to use a lock around all calls. (I - # didn't want to slow this down in the serial case by using a - # lock here.) - - random = self.random - z = self.gauss_next - self.gauss_next = None - if z is None: - x2pi = random() * TWOPI - g2rad = _sqrt(-2.0 * _log(1.0 - random())) - z = _cos(x2pi) * g2rad - self.gauss_next = _sin(x2pi) * g2rad - - return mu + z * sigma - - ## -------------------- beta -------------------- - ## See - ## http://mail.python.org/pipermail/python-bugs-list/2001-January/003752.html - ## for Ivan Frohne's insightful analysis of why the original implementation: - ## - ## def betavariate(self, alpha, beta): - ## # Discrete Event Simulation in C, pp 87-88. - ## - ## y = self.expovariate(alpha) - ## z = self.expovariate(1.0/beta) - ## return z/(y+z) - ## - ## was dead wrong, and how it probably got that way. - def betavariate(self, alpha, beta): """Beta distribution. @@ -743,6 +724,18 @@ class Random(_random.Random): Returned values range between 0 and 1. """ + ## See + ## http://mail.python.org/pipermail/python-bugs-list/2001-January/003752.html + ## for Ivan Frohne's insightful analysis of why the original implementation: + ## + ## def betavariate(self, alpha, beta): + ## # Discrete Event Simulation in C, pp 87-88. + ## + ## y = self.expovariate(alpha) + ## z = self.expovariate(1.0/beta) + ## return z/(y+z) + ## + ## was dead wrong, and how it probably got that way. # This version due to Janne Sinkkonen, and matches all the std # texts (e.g., Knuth Vol 2 Ed 3 pg 134 "the beta distribution"). @@ -751,8 +744,6 @@ class Random(_random.Random): return y / (y + self.gammavariate(beta, 1.0)) return 0.0 - ## -------------------- Pareto -------------------- - def paretovariate(self, alpha): """Pareto distribution. alpha is the shape parameter.""" # Jain, pg. 495 @@ -760,8 +751,6 @@ class Random(_random.Random): u = 1.0 - self.random() return 1.0 / u ** (1.0 / alpha) - ## -------------------- Weibull -------------------- - def weibullvariate(self, alpha, beta): """Weibull distribution. @@ -774,14 +763,17 @@ class Random(_random.Random): return alpha * (-_log(u)) ** (1.0 / beta) +## ------------------------------------------------------------------ ## --------------- Operating System Random Source ------------------ + class SystemRandom(Random): """Alternate random number generator using sources provided by the operating system (such as /dev/urandom on Unix or CryptGenRandom on Windows). Not available on all systems (see os.urandom() for details). + """ def random(self): @@ -812,7 +804,41 @@ class SystemRandom(Random): getstate = setstate = _notimplemented -## -------------------- test program -------------------- +# ---------------------------------------------------------------------- +# Create one instance, seeded from current time, and export its methods +# as module-level functions. The functions share state across all uses +# (both in the user's code and in the Python libraries), but that's fine +# for most programs and is easier for the casual user than making them +# instantiate their own Random() instance. + +_inst = Random() +seed = _inst.seed +random = _inst.random +uniform = _inst.uniform +triangular = _inst.triangular +randint = _inst.randint +choice = _inst.choice +randrange = _inst.randrange +sample = _inst.sample +shuffle = _inst.shuffle +choices = _inst.choices +normalvariate = _inst.normalvariate +lognormvariate = _inst.lognormvariate +expovariate = _inst.expovariate +vonmisesvariate = _inst.vonmisesvariate +gammavariate = _inst.gammavariate +gauss = _inst.gauss +betavariate = _inst.betavariate +paretovariate = _inst.paretovariate +weibullvariate = _inst.weibullvariate +getstate = _inst.getstate +setstate = _inst.setstate +getrandbits = _inst.getrandbits +randbytes = _inst.randbytes + + +## ------------------------------------------------------ +## ----------------- test program ----------------------- def _test_generator(n, func, args): from statistics import stdev, fmean as mean @@ -849,36 +875,9 @@ def _test(N=2000): _test_generator(N, betavariate, (3.0, 3.0)) _test_generator(N, triangular, (0.0, 1.0, 1.0 / 3.0)) -# Create one instance, seeded from current time, and export its methods -# as module-level functions. The functions share state across all uses -# (both in the user's code and in the Python libraries), but that's fine -# for most programs and is easier for the casual user than making them -# instantiate their own Random() instance. -_inst = Random() -seed = _inst.seed -random = _inst.random -uniform = _inst.uniform -triangular = _inst.triangular -randint = _inst.randint -choice = _inst.choice -randrange = _inst.randrange -sample = _inst.sample -shuffle = _inst.shuffle -choices = _inst.choices -normalvariate = _inst.normalvariate -lognormvariate = _inst.lognormvariate -expovariate = _inst.expovariate -vonmisesvariate = _inst.vonmisesvariate -gammavariate = _inst.gammavariate -gauss = _inst.gauss -betavariate = _inst.betavariate -paretovariate = _inst.paretovariate -weibullvariate = _inst.weibullvariate -getstate = _inst.getstate -setstate = _inst.setstate -getrandbits = _inst.getrandbits -randbytes = _inst.randbytes +## ------------------------------------------------------ +## ------------------ fork support --------------------- if hasattr(_os, "fork"): _os.register_at_fork(after_in_child=_inst.seed)