Merged revisions 78818 via svnmerge from
[python/dscho.git] / Lib / random.py
blob592e4b899a17b9d218ec0d9f058785951389aeb7
1 """Random variable generators.
3 integers
4 --------
5 uniform within range
7 sequences
8 ---------
9 pick random element
10 pick random sample
11 generate random permutation
13 distributions on the real line:
14 ------------------------------
15 uniform
16 triangular
17 normal (Gaussian)
18 lognormal
19 negative exponential
20 gamma
21 beta
22 pareto
23 Weibull
25 distributions on the circle (angles 0 to 2pi)
26 ---------------------------------------------
27 circular uniform
28 von Mises
30 General notes on the underlying Mersenne Twister core generator:
32 * The period is 2**19937-1.
33 * It is one of the most extensively tested generators in existence.
34 * The random() method is implemented in C, executes in a single Python step,
35 and is, therefore, threadsafe.
37 """
39 from __future__ import division
40 from warnings import warn as _warn
41 from types import MethodType as _MethodType, BuiltinMethodType as _BuiltinMethodType
42 from math import log as _log, exp as _exp, pi as _pi, e as _e, ceil as _ceil
43 from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin
44 from os import urandom as _urandom
45 from binascii import hexlify as _hexlify
46 import collections as _collections
48 __all__ = ["Random","seed","random","uniform","randint","choice","sample",
49 "randrange","shuffle","normalvariate","lognormvariate",
50 "expovariate","vonmisesvariate","gammavariate","triangular",
51 "gauss","betavariate","paretovariate","weibullvariate",
52 "getstate","setstate", "getrandbits",
53 "SystemRandom"]
55 NV_MAGICCONST = 4 * _exp(-0.5)/_sqrt(2.0)
56 TWOPI = 2.0*_pi
57 LOG4 = _log(4.0)
58 SG_MAGICCONST = 1.0 + _log(4.5)
59 BPF = 53 # Number of bits in a float
60 RECIP_BPF = 2**-BPF
63 # Translated by Guido van Rossum from C source provided by
64 # Adrian Baddeley. Adapted by Raymond Hettinger for use with
65 # the Mersenne Twister and os.urandom() core generators.
67 import _random
69 class Random(_random.Random):
70 """Random number generator base class used by bound module functions.
72 Used to instantiate instances of Random to get generators that don't
73 share state.
75 Class Random can also be subclassed if you want to use a different basic
76 generator of your own devising: in that case, override the following
77 methods: random(), seed(), getstate(), and setstate().
78 Optionally, implement a getrandbits() method so that randrange()
79 can cover arbitrarily large ranges.
81 """
83 VERSION = 3 # used by getstate/setstate
85 def __init__(self, x=None):
86 """Initialize an instance.
88 Optional argument x controls seeding, as for Random.seed().
89 """
91 self.seed(x)
92 self.gauss_next = None
94 def seed(self, a=None):
95 """Initialize internal state from hashable object.
97 None or no argument seeds from current time or from an operating
98 system specific randomness source if available.
100 If a is not None or an int, hash(a) is used instead.
103 if a is None:
104 try:
105 a = int(_hexlify(_urandom(16)), 16)
106 except NotImplementedError:
107 import time
108 a = int(time.time() * 256) # use fractional seconds
110 super().seed(a)
111 self.gauss_next = None
113 def getstate(self):
114 """Return internal state; can be passed to setstate() later."""
115 return self.VERSION, super().getstate(), self.gauss_next
117 def setstate(self, state):
118 """Restore internal state from object returned by getstate()."""
119 version = state[0]
120 if version == 3:
121 version, internalstate, self.gauss_next = state
122 super().setstate(internalstate)
123 elif version == 2:
124 version, internalstate, self.gauss_next = state
125 # In version 2, the state was saved as signed ints, which causes
126 # inconsistencies between 32/64-bit systems. The state is
127 # really unsigned 32-bit ints, so we convert negative ints from
128 # version 2 to positive longs for version 3.
129 try:
130 internalstate = tuple( x % (2**32) for x in internalstate )
131 except ValueError as e:
132 raise TypeError from e
133 super(Random, self).setstate(internalstate)
134 else:
135 raise ValueError("state with version %s passed to "
136 "Random.setstate() of version %s" %
137 (version, self.VERSION))
139 ## ---- Methods below this point do not need to be overridden when
140 ## ---- subclassing for the purpose of using a different core generator.
142 ## -------------------- pickle support -------------------
144 def __getstate__(self): # for pickle
145 return self.getstate()
147 def __setstate__(self, state): # for pickle
148 self.setstate(state)
150 def __reduce__(self):
151 return self.__class__, (), self.getstate()
153 ## -------------------- integer methods -------------------
155 def randrange(self, start, stop=None, step=1, int=int, default=None,
156 maxwidth=1<<BPF):
157 """Choose a random item from range(start, stop[, step]).
159 This fixes the problem with randint() which includes the
160 endpoint; in Python this is usually not what you want.
161 Do not supply the 'int', 'default', and 'maxwidth' arguments.
164 # This code is a bit messy to make it fast for the
165 # common case while still doing adequate error checking.
166 istart = int(start)
167 if istart != start:
168 raise ValueError("non-integer arg 1 for randrange()")
169 if stop is default:
170 if istart > 0:
171 if istart >= maxwidth:
172 return self._randbelow(istart)
173 return int(self.random() * istart)
174 raise ValueError("empty range for randrange()")
176 # stop argument supplied.
177 istop = int(stop)
178 if istop != stop:
179 raise ValueError("non-integer stop for randrange()")
180 width = istop - istart
181 if step == 1 and width > 0:
182 # Note that
183 # int(istart + self.random()*width)
184 # instead would be incorrect. For example, consider istart
185 # = -2 and istop = 0. Then the guts would be in
186 # -2.0 to 0.0 exclusive on both ends (ignoring that random()
187 # might return 0.0), and because int() truncates toward 0, the
188 # final result would be -1 or 0 (instead of -2 or -1).
189 # istart + int(self.random()*width)
190 # would also be incorrect, for a subtler reason: the RHS
191 # can return a long, and then randrange() would also return
192 # a long, but we're supposed to return an int (for backward
193 # compatibility).
195 if width >= maxwidth:
196 return int(istart + self._randbelow(width))
197 return int(istart + int(self.random()*width))
198 if step == 1:
199 raise ValueError("empty range for randrange() (%d,%d, %d)" % (istart, istop, width))
201 # Non-unit step argument supplied.
202 istep = int(step)
203 if istep != step:
204 raise ValueError("non-integer step for randrange()")
205 if istep > 0:
206 n = (width + istep - 1) // istep
207 elif istep < 0:
208 n = (width + istep + 1) // istep
209 else:
210 raise ValueError("zero step for randrange()")
212 if n <= 0:
213 raise ValueError("empty range for randrange()")
215 if n >= maxwidth:
216 return istart + istep*self._randbelow(n)
217 return istart + istep*int(self.random() * n)
219 def randint(self, a, b):
220 """Return random integer in range [a, b], including both end points.
223 return self.randrange(a, b+1)
225 def _randbelow(self, n, _log=_log, int=int, _maxwidth=1<<BPF,
226 _Method=_MethodType, _BuiltinMethod=_BuiltinMethodType):
227 """Return a random int in the range [0,n)
229 Handles the case where n has more bits than returned
230 by a single call to the underlying generator.
233 try:
234 getrandbits = self.getrandbits
235 except AttributeError:
236 pass
237 else:
238 # Only call self.getrandbits if the original random() builtin method
239 # has not been overridden or if a new getrandbits() was supplied.
240 # This assures that the two methods correspond.
241 if type(self.random) is _BuiltinMethod or type(getrandbits) is _Method:
242 k = int(1.00001 + _log(n-1, 2.0)) # 2**k > n-1 > 2**(k-2)
243 r = getrandbits(k)
244 while r >= n:
245 r = getrandbits(k)
246 return r
247 if n >= _maxwidth:
248 _warn("Underlying random() generator does not supply \n"
249 "enough bits to choose from a population range this large")
250 return int(self.random() * n)
252 ## -------------------- sequence methods -------------------
254 def choice(self, seq):
255 """Choose a random element from a non-empty sequence."""
256 return seq[int(self.random() * len(seq))] # raises IndexError if seq is empty
258 def shuffle(self, x, random=None, int=int):
259 """x, random=random.random -> shuffle list x in place; return None.
261 Optional arg random is a 0-argument function returning a random
262 float in [0.0, 1.0); by default, the standard random.random.
265 if random is None:
266 random = self.random
267 for i in reversed(range(1, len(x))):
268 # pick an element in x[:i+1] with which to exchange x[i]
269 j = int(random() * (i+1))
270 x[i], x[j] = x[j], x[i]
272 def sample(self, population, k):
273 """Chooses k unique random elements from a population sequence or set.
275 Returns a new list containing elements from the population while
276 leaving the original population unchanged. The resulting list is
277 in selection order so that all sub-slices will also be valid random
278 samples. This allows raffle winners (the sample) to be partitioned
279 into grand prize and second place winners (the subslices).
281 Members of the population need not be hashable or unique. If the
282 population contains repeats, then each occurrence is a possible
283 selection in the sample.
285 To choose a sample in a range of integers, use range as an argument.
286 This is especially fast and space efficient for sampling from a
287 large population: sample(range(10000000), 60)
290 # Sampling without replacement entails tracking either potential
291 # selections (the pool) in a list or previous selections in a set.
293 # When the number of selections is small compared to the
294 # population, then tracking selections is efficient, requiring
295 # only a small set and an occasional reselection. For
296 # a larger number of selections, the pool tracking method is
297 # preferred since the list takes less space than the
298 # set and it doesn't suffer from frequent reselections.
300 if isinstance(population, _collections.Set):
301 population = tuple(population)
302 if not isinstance(population, _collections.Sequence):
303 raise TypeError("Population must be a sequence or Set. For dicts, use list(d).")
304 random = self.random
305 n = len(population)
306 if not 0 <= k <= n:
307 raise ValueError("Sample larger than population")
308 _int = int
309 result = [None] * k
310 setsize = 21 # size of a small set minus size of an empty list
311 if k > 5:
312 setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets
313 if n <= setsize:
314 # An n-length list is smaller than a k-length set
315 pool = list(population)
316 for i in range(k): # invariant: non-selected at [0,n-i)
317 j = _int(random() * (n-i))
318 result[i] = pool[j]
319 pool[j] = pool[n-i-1] # move non-selected item into vacancy
320 else:
321 selected = set()
322 selected_add = selected.add
323 for i in range(k):
324 j = _int(random() * n)
325 while j in selected:
326 j = _int(random() * n)
327 selected_add(j)
328 result[i] = population[j]
329 return result
331 ## -------------------- real-valued distributions -------------------
333 ## -------------------- uniform distribution -------------------
335 def uniform(self, a, b):
336 "Get a random number in the range [a, b) or [a, b] depending on rounding."
337 return a + (b-a) * self.random()
339 ## -------------------- triangular --------------------
341 def triangular(self, low=0.0, high=1.0, mode=None):
342 """Triangular distribution.
344 Continuous distribution bounded by given lower and upper limits,
345 and having a given mode value in-between.
347 http://en.wikipedia.org/wiki/Triangular_distribution
350 u = self.random()
351 c = 0.5 if mode is None else (mode - low) / (high - low)
352 if u > c:
353 u = 1.0 - u
354 c = 1.0 - c
355 low, high = high, low
356 return low + (high - low) * (u * c) ** 0.5
358 ## -------------------- normal distribution --------------------
360 def normalvariate(self, mu, sigma):
361 """Normal distribution.
363 mu is the mean, and sigma is the standard deviation.
366 # mu = mean, sigma = standard deviation
368 # Uses Kinderman and Monahan method. Reference: Kinderman,
369 # A.J. and Monahan, J.F., "Computer generation of random
370 # variables using the ratio of uniform deviates", ACM Trans
371 # Math Software, 3, (1977), pp257-260.
373 random = self.random
374 while 1:
375 u1 = random()
376 u2 = 1.0 - random()
377 z = NV_MAGICCONST*(u1-0.5)/u2
378 zz = z*z/4.0
379 if zz <= -_log(u2):
380 break
381 return mu + z*sigma
383 ## -------------------- lognormal distribution --------------------
385 def lognormvariate(self, mu, sigma):
386 """Log normal distribution.
388 If you take the natural logarithm of this distribution, you'll get a
389 normal distribution with mean mu and standard deviation sigma.
390 mu can have any value, and sigma must be greater than zero.
393 return _exp(self.normalvariate(mu, sigma))
395 ## -------------------- exponential distribution --------------------
397 def expovariate(self, lambd):
398 """Exponential distribution.
400 lambd is 1.0 divided by the desired mean. It should be
401 nonzero. (The parameter would be called "lambda", but that is
402 a reserved word in Python.) Returned values range from 0 to
403 positive infinity if lambd is positive, and from negative
404 infinity to 0 if lambd is negative.
407 # lambd: rate lambd = 1/mean
408 # ('lambda' is a Python reserved word)
410 random = self.random
411 u = random()
412 while u <= 1e-7:
413 u = random()
414 return -_log(u)/lambd
416 ## -------------------- von Mises distribution --------------------
418 def vonmisesvariate(self, mu, kappa):
419 """Circular data distribution.
421 mu is the mean angle, expressed in radians between 0 and 2*pi, and
422 kappa is the concentration parameter, which must be greater than or
423 equal to zero. If kappa is equal to zero, this distribution reduces
424 to a uniform random angle over the range 0 to 2*pi.
427 # mu: mean angle (in radians between 0 and 2*pi)
428 # kappa: concentration parameter kappa (>= 0)
429 # if kappa = 0 generate uniform random angle
431 # Based upon an algorithm published in: Fisher, N.I.,
432 # "Statistical Analysis of Circular Data", Cambridge
433 # University Press, 1993.
435 # Thanks to Magnus Kessler for a correction to the
436 # implementation of step 4.
438 random = self.random
439 if kappa <= 1e-6:
440 return TWOPI * random()
442 a = 1.0 + _sqrt(1.0 + 4.0 * kappa * kappa)
443 b = (a - _sqrt(2.0 * a))/(2.0 * kappa)
444 r = (1.0 + b * b)/(2.0 * b)
446 while 1:
447 u1 = random()
449 z = _cos(_pi * u1)
450 f = (1.0 + r * z)/(r + z)
451 c = kappa * (r - f)
453 u2 = random()
455 if u2 < c * (2.0 - c) or u2 <= c * _exp(1.0 - c):
456 break
458 u3 = random()
459 if u3 > 0.5:
460 theta = (mu % TWOPI) + _acos(f)
461 else:
462 theta = (mu % TWOPI) - _acos(f)
464 return theta
466 ## -------------------- gamma distribution --------------------
468 def gammavariate(self, alpha, beta):
469 """Gamma distribution. Not the gamma function!
471 Conditions on the parameters are alpha > 0 and beta > 0.
475 # alpha > 0, beta > 0, mean is alpha*beta, variance is alpha*beta**2
477 # Warning: a few older sources define the gamma distribution in terms
478 # of alpha > -1.0
479 if alpha <= 0.0 or beta <= 0.0:
480 raise ValueError('gammavariate: alpha and beta must be > 0.0')
482 random = self.random
483 if alpha > 1.0:
485 # Uses R.C.H. Cheng, "The generation of Gamma
486 # variables with non-integral shape parameters",
487 # Applied Statistics, (1977), 26, No. 1, p71-74
489 ainv = _sqrt(2.0 * alpha - 1.0)
490 bbb = alpha - LOG4
491 ccc = alpha + ainv
493 while 1:
494 u1 = random()
495 if not 1e-7 < u1 < .9999999:
496 continue
497 u2 = 1.0 - random()
498 v = _log(u1/(1.0-u1))/ainv
499 x = alpha*_exp(v)
500 z = u1*u1*u2
501 r = bbb+ccc*v-x
502 if r + SG_MAGICCONST - 4.5*z >= 0.0 or r >= _log(z):
503 return x * beta
505 elif alpha == 1.0:
506 # expovariate(1)
507 u = random()
508 while u <= 1e-7:
509 u = random()
510 return -_log(u) * beta
512 else: # alpha is between 0 and 1 (exclusive)
514 # Uses ALGORITHM GS of Statistical Computing - Kennedy & Gentle
516 while 1:
517 u = random()
518 b = (_e + alpha)/_e
519 p = b*u
520 if p <= 1.0:
521 x = p ** (1.0/alpha)
522 else:
523 x = -_log((b-p)/alpha)
524 u1 = random()
525 if p > 1.0:
526 if u1 <= x ** (alpha - 1.0):
527 break
528 elif u1 <= _exp(-x):
529 break
530 return x * beta
532 ## -------------------- Gauss (faster alternative) --------------------
534 def gauss(self, mu, sigma):
535 """Gaussian distribution.
537 mu is the mean, and sigma is the standard deviation. This is
538 slightly faster than the normalvariate() function.
540 Not thread-safe without a lock around calls.
544 # When x and y are two variables from [0, 1), uniformly
545 # distributed, then
547 # cos(2*pi*x)*sqrt(-2*log(1-y))
548 # sin(2*pi*x)*sqrt(-2*log(1-y))
550 # are two *independent* variables with normal distribution
551 # (mu = 0, sigma = 1).
552 # (Lambert Meertens)
553 # (corrected version; bug discovered by Mike Miller, fixed by LM)
555 # Multithreading note: When two threads call this function
556 # simultaneously, it is possible that they will receive the
557 # same return value. The window is very small though. To
558 # avoid this, you have to use a lock around all calls. (I
559 # didn't want to slow this down in the serial case by using a
560 # lock here.)
562 random = self.random
563 z = self.gauss_next
564 self.gauss_next = None
565 if z is None:
566 x2pi = random() * TWOPI
567 g2rad = _sqrt(-2.0 * _log(1.0 - random()))
568 z = _cos(x2pi) * g2rad
569 self.gauss_next = _sin(x2pi) * g2rad
571 return mu + z*sigma
573 ## -------------------- beta --------------------
574 ## See
575 ## http://sourceforge.net/bugs/?func=detailbug&bug_id=130030&group_id=5470
576 ## for Ivan Frohne's insightful analysis of why the original implementation:
578 ## def betavariate(self, alpha, beta):
579 ## # Discrete Event Simulation in C, pp 87-88.
581 ## y = self.expovariate(alpha)
582 ## z = self.expovariate(1.0/beta)
583 ## return z/(y+z)
585 ## was dead wrong, and how it probably got that way.
587 def betavariate(self, alpha, beta):
588 """Beta distribution.
590 Conditions on the parameters are alpha > 0 and beta > 0.
591 Returned values range between 0 and 1.
595 # This version due to Janne Sinkkonen, and matches all the std
596 # texts (e.g., Knuth Vol 2 Ed 3 pg 134 "the beta distribution").
597 y = self.gammavariate(alpha, 1.)
598 if y == 0:
599 return 0.0
600 else:
601 return y / (y + self.gammavariate(beta, 1.))
603 ## -------------------- Pareto --------------------
605 def paretovariate(self, alpha):
606 """Pareto distribution. alpha is the shape parameter."""
607 # Jain, pg. 495
609 u = 1.0 - self.random()
610 return 1.0 / pow(u, 1.0/alpha)
612 ## -------------------- Weibull --------------------
614 def weibullvariate(self, alpha, beta):
615 """Weibull distribution.
617 alpha is the scale parameter and beta is the shape parameter.
620 # Jain, pg. 499; bug fix courtesy Bill Arms
622 u = 1.0 - self.random()
623 return alpha * pow(-_log(u), 1.0/beta)
625 ## --------------- Operating System Random Source ------------------
627 class SystemRandom(Random):
628 """Alternate random number generator using sources provided
629 by the operating system (such as /dev/urandom on Unix or
630 CryptGenRandom on Windows).
632 Not available on all systems (see os.urandom() for details).
635 def random(self):
636 """Get the next random number in the range [0.0, 1.0)."""
637 return (int(_hexlify(_urandom(7)), 16) >> 3) * RECIP_BPF
639 def getrandbits(self, k):
640 """getrandbits(k) -> x. Generates a long int with k random bits."""
641 if k <= 0:
642 raise ValueError('number of bits must be greater than zero')
643 if k != int(k):
644 raise TypeError('number of bits should be an integer')
645 bytes = (k + 7) // 8 # bits / 8 and rounded up
646 x = int(_hexlify(_urandom(bytes)), 16)
647 return x >> (bytes * 8 - k) # trim excess bits
649 def seed(self, *args, **kwds):
650 "Stub method. Not used for a system random number generator."
651 return None
653 def _notimplemented(self, *args, **kwds):
654 "Method should not be called for a system random number generator."
655 raise NotImplementedError('System entropy source does not have state.')
656 getstate = setstate = _notimplemented
658 ## -------------------- test program --------------------
660 def _test_generator(n, func, args):
661 import time
662 print(n, 'times', func.__name__)
663 total = 0.0
664 sqsum = 0.0
665 smallest = 1e10
666 largest = -1e10
667 t0 = time.time()
668 for i in range(n):
669 x = func(*args)
670 total += x
671 sqsum = sqsum + x*x
672 smallest = min(x, smallest)
673 largest = max(x, largest)
674 t1 = time.time()
675 print(round(t1-t0, 3), 'sec,', end=' ')
676 avg = total/n
677 stddev = _sqrt(sqsum/n - avg*avg)
678 print('avg %g, stddev %g, min %g, max %g' % \
679 (avg, stddev, smallest, largest))
682 def _test(N=2000):
683 _test_generator(N, random, ())
684 _test_generator(N, normalvariate, (0.0, 1.0))
685 _test_generator(N, lognormvariate, (0.0, 1.0))
686 _test_generator(N, vonmisesvariate, (0.0, 1.0))
687 _test_generator(N, gammavariate, (0.01, 1.0))
688 _test_generator(N, gammavariate, (0.1, 1.0))
689 _test_generator(N, gammavariate, (0.1, 2.0))
690 _test_generator(N, gammavariate, (0.5, 1.0))
691 _test_generator(N, gammavariate, (0.9, 1.0))
692 _test_generator(N, gammavariate, (1.0, 1.0))
693 _test_generator(N, gammavariate, (2.0, 1.0))
694 _test_generator(N, gammavariate, (20.0, 1.0))
695 _test_generator(N, gammavariate, (200.0, 1.0))
696 _test_generator(N, gauss, (0.0, 1.0))
697 _test_generator(N, betavariate, (3.0, 3.0))
698 _test_generator(N, triangular, (0.0, 1.0, 1.0/3.0))
700 # Create one instance, seeded from current time, and export its methods
701 # as module-level functions. The functions share state across all uses
702 #(both in the user's code and in the Python libraries), but that's fine
703 # for most programs and is easier for the casual user than making them
704 # instantiate their own Random() instance.
706 _inst = Random()
707 seed = _inst.seed
708 random = _inst.random
709 uniform = _inst.uniform
710 triangular = _inst.triangular
711 randint = _inst.randint
712 choice = _inst.choice
713 randrange = _inst.randrange
714 sample = _inst.sample
715 shuffle = _inst.shuffle
716 normalvariate = _inst.normalvariate
717 lognormvariate = _inst.lognormvariate
718 expovariate = _inst.expovariate
719 vonmisesvariate = _inst.vonmisesvariate
720 gammavariate = _inst.gammavariate
721 gauss = _inst.gauss
722 betavariate = _inst.betavariate
723 paretovariate = _inst.paretovariate
724 weibullvariate = _inst.weibullvariate
725 getstate = _inst.getstate
726 setstate = _inst.setstate
727 getrandbits = _inst.getrandbits
729 if __name__ == '__main__':
730 _test()