Improve code organization for the random module (GH-21161)

This commit is contained in:
Raymond Hettinger 2020-06-25 17:03:50 -07:00 committed by GitHub
parent 4b85e60601
commit ef19bad7d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,5 +1,9 @@
"""Random variable generators.
bytes
-----
uniform bytes (values between 0 and 255)
integers
--------
uniform within range
@ -37,6 +41,10 @@ General notes on the underlying Mersenne Twister core generator:
"""
# Translated by Guido van Rossum from C source provided by
# Adrian Baddeley. Adapted by Raymond Hettinger for use with
# the Mersenne Twister and os.urandom() core generators.
from warnings import warn as _warn
from math import log as _log, exp as _exp, pi as _pi, e as _e, ceil as _ceil
from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin
@ -46,6 +54,7 @@ from _collections_abc import Set as _Set, Sequence as _Sequence
from itertools import accumulate as _accumulate, repeat as _repeat
from bisect import bisect as _bisect
import os as _os
import _random
try:
# hashlib is pretty heavy to load, try lean internal module first
@ -54,7 +63,6 @@ except ImportError:
# fallback to official implementation
from hashlib import sha512 as _sha512
__all__ = [
"Random",
"SystemRandom",
@ -89,13 +97,6 @@ BPF = 53 # Number of bits in a float
RECIP_BPF = 2 ** -BPF
# Translated by Guido van Rossum from C source provided by
# Adrian Baddeley. Adapted by Raymond Hettinger for use with
# the Mersenne Twister and os.urandom() core generators.
import _random
class Random(_random.Random):
"""Random number generator base class used by bound module functions.
@ -121,26 +122,6 @@ class Random(_random.Random):
self.seed(x)
self.gauss_next = None
def __init_subclass__(cls, /, **kwargs):
"""Control how subclasses generate random integers.
The algorithm a subclass can use depends on the random() and/or
getrandbits() implementation available to it and determines
whether it can generate random integers from arbitrarily large
ranges.
"""
for c in cls.__mro__:
if '_randbelow' in c.__dict__:
# just inherit it
break
if 'getrandbits' in c.__dict__:
cls._randbelow = cls._randbelow_with_getrandbits
break
if 'random' in c.__dict__:
cls._randbelow = cls._randbelow_without_getrandbits
break
def seed(self, a=None, version=2):
"""Initialize internal state from a seed.
@ -210,14 +191,11 @@ class Random(_random.Random):
"Random.setstate() of version %s" %
(version, self.VERSION))
## ---- Methods below this point do not need to be overridden when
## ---- subclassing for the purpose of using a different core generator.
## -------------------- bytes methods ---------------------
## -------------------------------------------------------
## ---- Methods below this point do not need to be overridden or extended
## ---- when subclassing for the purpose of using a different core generator.
def randbytes(self, n):
"""Generate n random bytes."""
return self.getrandbits(n * 8).to_bytes(n, 'little')
## -------------------- pickle support -------------------
@ -233,6 +211,80 @@ class Random(_random.Random):
def __reduce__(self):
return self.__class__, (), self.getstate()
## ---- internal support method for evenly distributed integers ----
def __init_subclass__(cls, /, **kwargs):
"""Control how subclasses generate random integers.
The algorithm a subclass can use depends on the random() and/or
getrandbits() implementation available to it and determines
whether it can generate random integers from arbitrarily large
ranges.
"""
for c in cls.__mro__:
if '_randbelow' in c.__dict__:
# just inherit it
break
if 'getrandbits' in c.__dict__:
cls._randbelow = cls._randbelow_with_getrandbits
break
if 'random' in c.__dict__:
cls._randbelow = cls._randbelow_without_getrandbits
break
def _randbelow_with_getrandbits(self, n):
"Return a random int in the range [0,n). Returns 0 if n==0."
if not n:
return 0
getrandbits = self.getrandbits
k = n.bit_length() # don't use (n-1) here because n can be 1
r = getrandbits(k) # 0 <= r < 2**k
while r >= n:
r = getrandbits(k)
return r
def _randbelow_without_getrandbits(self, n, maxsize=1<<BPF):
"""Return a random int in the range [0,n). Returns 0 if n==0.
The implementation does not use getrandbits, but only random.
"""
random = self.random
if n >= maxsize:
_warn("Underlying random() generator does not supply \n"
"enough bits to choose from a population range this large.\n"
"To remove the range limitation, add a getrandbits() method.")
return _floor(random() * n)
if n == 0:
return 0
rem = maxsize % n
limit = (maxsize - rem) / maxsize # int(limit * maxsize) % n == 0
r = random()
while r >= limit:
r = random()
return _floor(r * maxsize) % n
_randbelow = _randbelow_with_getrandbits
## --------------------------------------------------------
## ---- Methods below this point generate custom distributions
## ---- based on the methods defined above. They do not
## ---- directly touch the underlying generator and only
## ---- access randomness through the methods: random(),
## ---- getrandbits(), or _randbelow().
## -------------------- bytes methods ---------------------
def randbytes(self, n):
"""Generate n random bytes."""
return self.getrandbits(n * 8).to_bytes(n, 'little')
## -------------------- integer methods -------------------
def randrange(self, start, stop=None, step=1):
@ -285,40 +337,6 @@ class Random(_random.Random):
return self.randrange(a, b+1)
def _randbelow_with_getrandbits(self, n):
"Return a random int in the range [0,n). Returns 0 if n==0."
if not n:
return 0
getrandbits = self.getrandbits
k = n.bit_length() # don't use (n-1) here because n can be 1
r = getrandbits(k) # 0 <= r < 2**k
while r >= n:
r = getrandbits(k)
return r
def _randbelow_without_getrandbits(self, n, maxsize=1<<BPF):
"""Return a random int in the range [0,n). Returns 0 if n==0.
The implementation does not use getrandbits, but only random.
"""
random = self.random
if n >= maxsize:
_warn("Underlying random() generator does not supply \n"
"enough bits to choose from a population range this large.\n"
"To remove the range limitation, add a getrandbits() method.")
return _floor(random() * n)
if n == 0:
return 0
rem = maxsize % n
limit = (maxsize - rem) / maxsize # int(limit * maxsize) % n == 0
r = random()
while r >= limit:
r = random()
return _floor(r * maxsize) % n
_randbelow = _randbelow_with_getrandbits
## -------------------- sequence methods -------------------
@ -479,16 +497,13 @@ class Random(_random.Random):
return [population[bisect(cum_weights, random() * total, 0, hi)]
for i in _repeat(None, k)]
## -------------------- real-valued distributions -------------------
## -------------------- uniform distribution -------------------
## -------------------- real-valued distributions -------------------
def uniform(self, a, b):
"Get a random number in the range [a, b) or [a, b] depending on rounding."
return a + (b - a) * self.random()
## -------------------- triangular --------------------
def triangular(self, low=0.0, high=1.0, mode=None):
"""Triangular distribution.
@ -509,16 +524,12 @@ class Random(_random.Random):
low, high = high, low
return low + (high - low) * _sqrt(u * c)
## -------------------- normal distribution --------------------
def normalvariate(self, mu, sigma):
"""Normal distribution.
mu is the mean, and sigma is the standard deviation.
"""
# mu = mean, sigma = standard deviation
# Uses Kinderman and Monahan method. Reference: Kinderman,
# A.J. and Monahan, J.F., "Computer generation of random
# variables using the ratio of uniform deviates", ACM Trans
@ -534,7 +545,43 @@ class Random(_random.Random):
break
return mu + z * sigma
## -------------------- lognormal distribution --------------------
def gauss(self, mu, sigma):
"""Gaussian distribution.
mu is the mean, and sigma is the standard deviation. This is
slightly faster than the normalvariate() function.
Not thread-safe without a lock around calls.
"""
# When x and y are two variables from [0, 1), uniformly
# distributed, then
#
# cos(2*pi*x)*sqrt(-2*log(1-y))
# sin(2*pi*x)*sqrt(-2*log(1-y))
#
# are two *independent* variables with normal distribution
# (mu = 0, sigma = 1).
# (Lambert Meertens)
# (corrected version; bug discovered by Mike Miller, fixed by LM)
# Multithreading note: When two threads call this function
# simultaneously, it is possible that they will receive the
# same return value. The window is very small though. To
# avoid this, you have to use a lock around all calls. (I
# didn't want to slow this down in the serial case by using a
# lock here.)
random = self.random
z = self.gauss_next
self.gauss_next = None
if z is None:
x2pi = random() * TWOPI
g2rad = _sqrt(-2.0 * _log(1.0 - random()))
z = _cos(x2pi) * g2rad
self.gauss_next = _sin(x2pi) * g2rad
return mu + z * sigma
def lognormvariate(self, mu, sigma):
"""Log normal distribution.
@ -546,8 +593,6 @@ class Random(_random.Random):
"""
return _exp(self.normalvariate(mu, sigma))
## -------------------- exponential distribution --------------------
def expovariate(self, lambd):
"""Exponential distribution.
@ -565,8 +610,6 @@ class Random(_random.Random):
# possibility of taking the log of zero.
return -_log(1.0 - self.random()) / lambd
## -------------------- von Mises distribution --------------------
def vonmisesvariate(self, mu, kappa):
"""Circular data distribution.
@ -576,10 +619,6 @@ class Random(_random.Random):
to a uniform random angle over the range 0 to 2*pi.
"""
# mu: mean angle (in radians between 0 and 2*pi)
# kappa: concentration parameter kappa (>= 0)
# if kappa = 0 generate uniform random angle
# Based upon an algorithm published in: Fisher, N.I.,
# "Statistical Analysis of Circular Data", Cambridge
# University Press, 1993.
@ -613,8 +652,6 @@ class Random(_random.Random):
return theta
## -------------------- gamma distribution --------------------
def gammavariate(self, alpha, beta):
"""Gamma distribution. Not the gamma function!
@ -627,7 +664,6 @@ class Random(_random.Random):
math.gamma(alpha) * beta ** alpha
"""
# alpha > 0, beta > 0, mean is alpha*beta, variance is alpha*beta**2
# Warning: a few older sources define the gamma distribution in terms
@ -681,61 +717,6 @@ class Random(_random.Random):
break
return x * beta
## -------------------- Gauss (faster alternative) --------------------
def gauss(self, mu, sigma):
"""Gaussian distribution.
mu is the mean, and sigma is the standard deviation. This is
slightly faster than the normalvariate() function.
Not thread-safe without a lock around calls.
"""
# When x and y are two variables from [0, 1), uniformly
# distributed, then
#
# cos(2*pi*x)*sqrt(-2*log(1-y))
# sin(2*pi*x)*sqrt(-2*log(1-y))
#
# are two *independent* variables with normal distribution
# (mu = 0, sigma = 1).
# (Lambert Meertens)
# (corrected version; bug discovered by Mike Miller, fixed by LM)
# Multithreading note: When two threads call this function
# simultaneously, it is possible that they will receive the
# same return value. The window is very small though. To
# avoid this, you have to use a lock around all calls. (I
# didn't want to slow this down in the serial case by using a
# lock here.)
random = self.random
z = self.gauss_next
self.gauss_next = None
if z is None:
x2pi = random() * TWOPI
g2rad = _sqrt(-2.0 * _log(1.0 - random()))
z = _cos(x2pi) * g2rad
self.gauss_next = _sin(x2pi) * g2rad
return mu + z * sigma
## -------------------- beta --------------------
## See
## http://mail.python.org/pipermail/python-bugs-list/2001-January/003752.html
## for Ivan Frohne's insightful analysis of why the original implementation:
##
## def betavariate(self, alpha, beta):
## # Discrete Event Simulation in C, pp 87-88.
##
## y = self.expovariate(alpha)
## z = self.expovariate(1.0/beta)
## return z/(y+z)
##
## was dead wrong, and how it probably got that way.
def betavariate(self, alpha, beta):
"""Beta distribution.
@ -743,6 +724,18 @@ class Random(_random.Random):
Returned values range between 0 and 1.
"""
## See
## http://mail.python.org/pipermail/python-bugs-list/2001-January/003752.html
## for Ivan Frohne's insightful analysis of why the original implementation:
##
## def betavariate(self, alpha, beta):
## # Discrete Event Simulation in C, pp 87-88.
##
## y = self.expovariate(alpha)
## z = self.expovariate(1.0/beta)
## return z/(y+z)
##
## was dead wrong, and how it probably got that way.
# This version due to Janne Sinkkonen, and matches all the std
# texts (e.g., Knuth Vol 2 Ed 3 pg 134 "the beta distribution").
@ -751,8 +744,6 @@ class Random(_random.Random):
return y / (y + self.gammavariate(beta, 1.0))
return 0.0
## -------------------- Pareto --------------------
def paretovariate(self, alpha):
"""Pareto distribution. alpha is the shape parameter."""
# Jain, pg. 495
@ -760,8 +751,6 @@ class Random(_random.Random):
u = 1.0 - self.random()
return 1.0 / u ** (1.0 / alpha)
## -------------------- Weibull --------------------
def weibullvariate(self, alpha, beta):
"""Weibull distribution.
@ -774,14 +763,17 @@ class Random(_random.Random):
return alpha * (-_log(u)) ** (1.0 / beta)
## ------------------------------------------------------------------
## --------------- Operating System Random Source ------------------
class SystemRandom(Random):
"""Alternate random number generator using sources provided
by the operating system (such as /dev/urandom on Unix or
CryptGenRandom on Windows).
Not available on all systems (see os.urandom() for details).
"""
def random(self):
@ -812,7 +804,41 @@ class SystemRandom(Random):
getstate = setstate = _notimplemented
## -------------------- test program --------------------
# ----------------------------------------------------------------------
# Create one instance, seeded from current time, and export its methods
# as module-level functions. The functions share state across all uses
# (both in the user's code and in the Python libraries), but that's fine
# for most programs and is easier for the casual user than making them
# instantiate their own Random() instance.
_inst = Random()
seed = _inst.seed
random = _inst.random
uniform = _inst.uniform
triangular = _inst.triangular
randint = _inst.randint
choice = _inst.choice
randrange = _inst.randrange
sample = _inst.sample
shuffle = _inst.shuffle
choices = _inst.choices
normalvariate = _inst.normalvariate
lognormvariate = _inst.lognormvariate
expovariate = _inst.expovariate
vonmisesvariate = _inst.vonmisesvariate
gammavariate = _inst.gammavariate
gauss = _inst.gauss
betavariate = _inst.betavariate
paretovariate = _inst.paretovariate
weibullvariate = _inst.weibullvariate
getstate = _inst.getstate
setstate = _inst.setstate
getrandbits = _inst.getrandbits
randbytes = _inst.randbytes
## ------------------------------------------------------
## ----------------- test program -----------------------
def _test_generator(n, func, args):
from statistics import stdev, fmean as mean
@ -849,36 +875,9 @@ def _test(N=2000):
_test_generator(N, betavariate, (3.0, 3.0))
_test_generator(N, triangular, (0.0, 1.0, 1.0 / 3.0))
# Create one instance, seeded from current time, and export its methods
# as module-level functions. The functions share state across all uses
# (both in the user's code and in the Python libraries), but that's fine
# for most programs and is easier for the casual user than making them
# instantiate their own Random() instance.
_inst = Random()
seed = _inst.seed
random = _inst.random
uniform = _inst.uniform
triangular = _inst.triangular
randint = _inst.randint
choice = _inst.choice
randrange = _inst.randrange
sample = _inst.sample
shuffle = _inst.shuffle
choices = _inst.choices
normalvariate = _inst.normalvariate
lognormvariate = _inst.lognormvariate
expovariate = _inst.expovariate
vonmisesvariate = _inst.vonmisesvariate
gammavariate = _inst.gammavariate
gauss = _inst.gauss
betavariate = _inst.betavariate
paretovariate = _inst.paretovariate
weibullvariate = _inst.weibullvariate
getstate = _inst.getstate
setstate = _inst.setstate
getrandbits = _inst.getrandbits
randbytes = _inst.randbytes
## ------------------------------------------------------
## ------------------ fork support ---------------------
if hasattr(_os, "fork"):
_os.register_at_fork(after_in_child=_inst.seed)