fix integer division in Sample.mean
[sympy.git] / sympy / statistics / distributions.py
blob529a7a10f7fdbe5796de07623f40883e819f1d80
1 from sympy.core import *
2 from sympy.core import sympify
3 from sympy.functions import sqrt, exp, erf
4 import random
7 class Sample(tuple):
8 """
9 Sample([x1, x2, x3, ...]) represents a collection of samples.
10 Sample parameters like mean, variance and stddev can be accessed as
11 properties.
12 """
13 def __new__(cls, sample):
14 s = tuple.__new__(cls, sample)
15 s.mean = mean = sum(s) / Integer(len(s))
16 s.variance = sum([(x-mean)**2 for x in s]) / Integer(len(s))
17 s.stddev = sqrt(s.variance)
18 return s
20 @property
21 def median(self):
22 raise NotImplementedError
24 def __repr__(self):
25 return "Sample([" + ", ".join([str(x) for x in self]) + "])"
28 class ContinuousProbability:
29 """Base class for continuous probability distributions"""
31 def probability(s, a, b):
32 """Calculate the probability that a random number x generated
33 from the distribution satisfies a <= x <= b """
34 return s.cdf(b) - s.cdf(a)
36 def random(s, n=None):
37 """
38 random() -- generate a random number from the distribution.
39 random(n) -- generate a Sample of n random numbers.
40 """
41 if n is None:
42 return s._random()
43 else:
44 return Sample([s._random() for i in xrange(n)])
47 class Normal(ContinuousProbability):
48 """
49 Normal(mu, sigma) represents the normal or Gaussian distribution
50 with mean value mu and standard deviation sigma.
52 Example usage:
54 >>> N = Normal(1, 2)
55 >>> N.mean
57 >>> N.variance
59 >>> N.probability(-oo, 1) # probability on an interval
60 1/2
61 >>> N.probability(1, oo)
62 1/2
63 >>> N.probability(-oo, oo)
65 >>> N.probability(-1, 3)
66 erf(1/2*2**(1/2))
67 >>> _.evalf()
68 0.682689492137086
70 """
71 def __init__(self, mu, sigma):
72 self.mu = sympify(mu)
73 self.sigma = sympify(sigma)
75 def __repr__(self):
76 return "Normal(%s, %s)" % (self.mu, self.sigma)
78 mean = property(lambda s: s.mu)
79 median = property(lambda s: s.mu)
80 mode = property(lambda s: s.mu)
81 stddev = property(lambda s: s.sigma)
82 variance = property(lambda s: s.sigma**2)
84 def pdf(s, x):
85 """Return the probability density function as an expression in x"""
86 x = sympify(x)
87 return 1/(s.sigma*sqrt(2*pi)) * exp(-(x-s.mu)**2 / (2*s.sigma**2))
89 def cdf(s, x):
90 """Return the cumulative density function as an expression in x"""
91 x = sympify(x)
92 return (1+erf((x-s.mu)/(s.sigma*sqrt(2))))/2
94 def _random(s):
95 return random.gauss(float(s.mu), float(s.sigma))
97 def confidence(s, p):
98 """Return a symmetric (p*100)% confidence interval. For example,
99 p=0.95 gives a 95% confidence interval. Currently this function
100 only handles numerical values except in the trivial case p=1.
102 Examples usage:
103 # One standard deviation
104 >>> N = Normal(0, 1)
105 >>> N.confidence(0.68)
106 (-0.994457883209753, 0.994457883209753)
107 >>> N.probability(*_).evalf()
108 0.68
110 # Two standard deviations
111 >>> N = Normal(0, 1)
112 >>> N.confidence(0.95)
113 (-1.95996398454005, 1.95996398454005)
114 >>> N.probability(*_).evalf()
115 0.95
118 if p == 1:
119 return (-oo, oo)
121 assert p <= 1
123 # In terms of n*sigma, we have n = sqrt(2)*ierf(p). The inverse
124 # error function is not yet implemented in SymPy but can easily be
125 # computed numerically
127 from sympy.mpmath import mpf, secant, erf
129 p = mpf(p)
130 # calculate y = ierf(p) by solving erf(y) - p = 0
131 y = secant(lambda y: erf(y) - p, 0)
132 t = Real(str(mpf(float(s.sigma)) * mpf(2)**0.5 * y))
133 mu = s.mu.evalf()
134 return (mu-t, mu+t)
136 @staticmethod
137 def fit(sample):
138 """Create a normal distribution fit to the mean and standard
139 deviation of the given distribution or sample."""
140 if not hasattr(sample, "stddev"):
141 sample = Sample(sample)
142 return Normal(sample.mean, sample.stddev)
145 class Uniform(ContinuousProbability):
147 Uniform(a, b) represents a probability distribution with uniform
148 probability density on the interval [a, b] and zero density
149 everywhere else.
151 def __init__(self, a, b):
152 self.a = sympify(a)
153 self.b = sympify(b)
155 def __repr__(self):
156 return "Uniform(%s, %s)" % (self.a, self.b)
158 mean = property(lambda s: (s.a+s.b)/2)
159 median = property(lambda s: (s.a+s.b)/2)
160 mode = property(lambda s: (s.a+s.b)/2) # arbitrary
161 variance = property(lambda s: (s.b-s.a)**2 / 12)
162 stddev = property(lambda s: sqrt(s.variance))
164 def pdf(s, x):
165 """Return the probability density function as an expression in x"""
166 x = sympify(x)
167 if not x.is_Number:
168 raise NotImplementedError("SymPy does not yet support"
169 "piecewise functions")
170 if x < s.a or x > s.b:
171 return Rational(0)
172 return 1/(s.b-s.a)
174 def cdf(s, x):
175 """Return the cumulative density function as an expression in x"""
176 x = sympify(x)
177 if not x.is_Number:
178 raise NotImplementedError("SymPy does not yet support"
179 "piecewise functions")
180 if x <= s.a:
181 return Rational(0)
182 if x >= s.b:
183 return Rational(1)
184 return (x-s.a)/(s.b-s.a)
186 def _random(s):
187 return Real(random.uniform(float(s.a), float(s.b)))
189 def confidence(s, p):
190 """Generate a symmetric (p*100)% confidence interval.
192 >>> U = Uniform(1, 2)
193 >>> U.confidence(1)
194 (1, 2)
195 >>> U.confidence(Rational(1,2))
196 (5/4, 7/4)
198 p = sympify(p)
199 assert p <= 1
201 d = (s.b-s.a)*p / 2
202 return (s.mean - d, s.mean + d)
204 @staticmethod
205 def fit(sample):
206 """Create a uniform distribution fit to the mean and standard
207 deviation of the given distribution or sample."""
208 if not hasattr(sample, "stddev"):
209 sample = Sample(sample)
210 m = sample.mean
211 d = sqrt(12*sample.variance)/2
212 return Uniform(m-d, m+d)