From 0de88fc4b108751b86443852b6741680d704168f Mon Sep 17 00:00:00 2001 From: Tim Peters Date: Thu, 1 Feb 2001 04:59:18 +0000 Subject: [PATCH] Change random.seed() so that it can get at the full range of possible internal states. Put the old .seed() (which could only get at about the square root of the # of possibilities) under the new name .whseed(), for bit-level compatibility with older versions. This occurred to me while reviewing effbot's book (he found himself stumbling over .seed() more than once there ...). --- Doc/lib/librandom.tex | 43 +++++++++++------ Lib/random.py | 104 +++++++++++++++++++++++++++--------------- Misc/NEWS | 10 ++++ 3 files changed, 106 insertions(+), 51 deletions(-) diff --git a/Doc/lib/librandom.tex b/Doc/lib/librandom.tex index d271c57d260..862d4664a70 100644 --- a/Doc/lib/librandom.tex +++ b/Doc/lib/librandom.tex @@ -25,7 +25,8 @@ numbers it generates before repeating the sequence exactly) is 6,953,607,871,644. While of much higher quality than the \function{rand()} function supplied by most C libraries, the theoretical properties are much the same as for a single linear congruential generator of -large modulus. +large modulus. It is not suitable for all purposes, and is completely +unsuitable for cryptographic purposes. The functions in this module are not threadsafe: if you want to call these functions from multiple threads, you should explicitly serialize the calls. @@ -72,7 +73,7 @@ gens = create_generators(10, 1000000) That creates 10 distinct generators, which can be passed out to 10 distinct threads. The generators don't share state so can be called safely in parallel. So long as no thread calls its \code{g.random()} more than a -million times (the second argument to \function{create_generators}), the +million times (the second argument to \function{create_generators), the sequences seen by each thread will not overlap. The period of the underlying Wichmann-Hill generator limits how far this technique can be pushed. @@ -83,10 +84,10 @@ also be used to "move backward in time": \begin{verbatim} >>> g = Random(42) # arbitrary >>> g.random() -0.24855401895528142 +0.25420336316883324 >>> g.jumpahead(6953607871644L - 1) # move *back* one >>> g.random() -0.24855401895528142 +0.25420336316883324 \end{verbatim} @@ -94,25 +95,38 @@ Bookkeeping functions: \begin{funcdesc}{seed}{\optional{x}} Initialize the basic random number generator. - Optional argument \var{x} can be any hashable object, - and the generator is seeded from its hash code. - It is not guaranteed that distinct hash codes will produce distinct - seeds. - If \var{x} is omitted or \code{None}, - the seed is derived from the current system time. - The seed is also set from the current system time when - the module is first imported. + Optional argument \var{x} can be any hashable object. + If \var(x) is omitted or \code{None}, current system time is used; + current system time is also used to initialize the generator when the + module is first imported. + If \var(x) is not \code{None} or an int or long, + \code{hash(\var{x})) is used instead. + If \var{x} is an int or long, \var{x} is used directly. + Distinct values between 0 and 27814431486575L inclusive are guaranteed + to yield distinct internal states (this guarantee is specific to the + default Wichmann-Hill generator, and may not apply to subclasses + supplying their own basic generator). +\end{funcdesc} + +\begin{funcdesc}{whseed}{\optional{x}} + This is obsolete, supplied for bit-level compatibility with versions + of Python prior to 2.1. + See \function{seed} for details. \function{whseed} does not guarantee + that distinct integer arguments yield distinct internal states, and can + yield no more than about 2**24 distinct internal states in all. \end{funcdesc} \begin{funcdesc}{getstate}{} Return an object capturing the current internal state of the generator. This object can be passed to \code{setstate()} to restore the state. - \end{funcdesc} + \versionadded{2.1} +\end{funcdesc} \begin{funcdesc}{setstate}{state} \var{state} should have been obtained from a previous call to \code{getstate()}, and \code{setstate()} restores the internal state - of the generate to what it was at the time \code{setstate()} was called. + of the generator to what it was at the time \code{setstate()} was called. + \versionadded{2.1} \end{funcdesc} \begin{funcdesc}{jumpahead}{n} @@ -124,6 +138,7 @@ Bookkeeping functions: internal state, and then \method{jumpahead()} can be used to force the instances' states as far apart as you like (up to the period of the generator). + \versionadded{2.1} \end{funcdesc} Functions for integers: diff --git a/Lib/random.py b/Lib/random.py index b9359e48120..427b73179c6 100644 --- a/Lib/random.py +++ b/Lib/random.py @@ -66,10 +66,10 @@ used to "move backward in time": >>> g = Random(42) # arbitrary >>> g.random() -0.24855401895528142 +0.25420336316883324 >>> g.jumpahead(6953607871644L - 1) # move *back* one >>> g.random() -0.24855401895528142 +0.25420336316883324 """ # XXX The docstring sucks. @@ -119,26 +119,31 @@ class Random: # different core generator should override the seed(), random(), # getstate(), setstate() and jumpahead() methods. - def __whseed(self, x=0, y=0, z=0): - """Set the Wichmann-Hill seed from (x, y, z). + def seed(self, a=None): + """Initialize internal state from hashable object. - These must be integers in the range [0, 256). + None or no argument seeds from current time. + + If a is not None or an int or long, hash(a) is instead. + + If a is an int or long, a is used directly. Distinct values between + 0 and 27814431486575L inclusive are guaranteed to yield distinct + internal states (this guarantee is specific to the default + Wichmann-Hill generator). """ - if not type(x) == type(y) == type(z) == type(0): - raise TypeError('seeds must be integers') - if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256): - raise ValueError('seeds must be in range(0, 256)') - if 0 == x == y == z: + if a is None: # Initialize from current time import time - t = long(time.time()) * 256 - t = int((t&0xffffff) ^ (t>>24)) - t, x = divmod(t, 256) - t, y = divmod(t, 256) - t, z = divmod(t, 256) - # Zero is a poor seed, so substitute 1 - self._seed = (x or 1, y or 1, z or 1) + a = long(time.time() * 256) + + if type(a) not in (type(3), type(3L)): + a = hash(a) + + a, x = divmod(a, 30268) + a, y = divmod(a, 30306) + a, z = divmod(a, 30322) + self._seed = int(x)+1, int(y)+1, int(z)+1 def random(self): """Get the next random number in the range [0.0, 1.0).""" @@ -171,26 +176,6 @@ class Random: # never return 0.0 (asserted by Tim; proof too long for a comment). return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0 - def seed(self, a=None): - """Seed from hashable object's hash code. - - None or no argument seeds from current time. It is not guaranteed - that objects with distinct hash codes lead to distinct internal - states. - """ - - if a is None: - self.__whseed() - return - a = hash(a) - a, x = divmod(a, 256) - a, y = divmod(a, 256) - a, z = divmod(a, 256) - x = (x + a) % 256 or 1 - y = (y + a) % 256 or 1 - z = (z + a) % 256 or 1 - self.__whseed(x, y, z) - def getstate(self): """Return internal state; can be passed to setstate() later.""" return self.VERSION, self._seed, self.gauss_next @@ -227,6 +212,50 @@ class Random: z = int(z * pow(170, n, 30323)) % 30323 self._seed = x, y, z + def __whseed(self, x=0, y=0, z=0): + """Set the Wichmann-Hill seed from (x, y, z). + + These must be integers in the range [0, 256). + """ + + if not type(x) == type(y) == type(z) == type(0): + raise TypeError('seeds must be integers') + if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256): + raise ValueError('seeds must be in range(0, 256)') + if 0 == x == y == z: + # Initialize from current time + import time + t = long(time.time() * 256) + t = int((t&0xffffff) ^ (t>>24)) + t, x = divmod(t, 256) + t, y = divmod(t, 256) + t, z = divmod(t, 256) + # Zero is a poor seed, so substitute 1 + self._seed = (x or 1, y or 1, z or 1) + + def whseed(self, a=None): + """Seed from hashable object's hash code. + + None or no argument seeds from current time. It is not guaranteed + that objects with distinct hash codes lead to distinct internal + states. + + This is obsolete, provided for compatibility with the seed routine + used prior to Python 2.1. Use the .seed() method instead. + """ + + if a is None: + self.__whseed() + return + a = hash(a) + a, x = divmod(a, 256) + a, y = divmod(a, 256) + a, z = divmod(a, 256) + x = (x + a) % 256 or 1 + y = (y + a) % 256 or 1 + z = (z + a) % 256 or 1 + self.__whseed(x, y, z) + ## ---- Methods below this point do not need to be overridden when ## ---- subclassing for the purpose of using a different core generator. @@ -623,6 +652,7 @@ weibullvariate = _inst.weibullvariate getstate = _inst.getstate setstate = _inst.setstate jumpahead = _inst.jumpahead +whseed = _inst.whseed if __name__ == '__main__': _test() diff --git a/Misc/NEWS b/Misc/NEWS index 5c633f04786..85d106feb57 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -32,6 +32,16 @@ Standard library each thread, then using .jumpahead() to force each instance to use a non-overlapping segment of the full period. +- random.py's seed() function is new. For bit-for-bit compatibility with + prior releases, use the whseed function instead. The new seed function + addresses two problems: (1) The old function couldn't produce more than + about 2**24 distinct internal states; the new one about 2**45 (the best + that can be done in the Wichmann-Hill generator). (2) The old function + sometimes produced identical internal states when passed distinct + integers, and there was no simple way to predict when that would happen; + the new one guarantees to produce distinct internal states for all + arguments in [0, 27814431486576L). + Windows changes - Build procedure: the zlib project is built in a different way that