Added a warning when constructing a Matrix without bracket + test modified
[sympy.git] / sympy / statistics / distributions.py
blobfcf4703606dc0016675fc783964b2bfae905ba97
1 from sympy.core import *
2 from sympy.core import sympify
3 from sympy.functions import sqrt, exp, erf
4 from sympy.printing import _StrPrinter as StrPrinter
5 import random
8 class Sample(tuple):
9 """
10 Sample([x1, x2, x3, ...]) represents a collection of samples.
11 Sample parameters like mean, variance and stddev can be accessed as
12 properties.
13 """
14 def __new__(cls, sample):
15 s = tuple.__new__(cls, sample)
16 s.mean = mean = sum(s) / Integer(len(s))
17 s.variance = sum([(x-mean)**2 for x in s]) / Integer(len(s))
18 s.stddev = sqrt(s.variance)
19 return s
21 @property
22 def median(self):
23 raise NotImplementedError
25 def __repr__(self):
26 return StrPrinter.doprint(self)
29 class ContinuousProbability(object):
30 """Base class for continuous probability distributions"""
32 def probability(s, a, b):
33 """Calculate the probability that a random number x generated
34 from the distribution satisfies a <= x <= b """
35 return s.cdf(b) - s.cdf(a)
37 def random(s, n=None):
38 """
39 random() -- generate a random number from the distribution.
40 random(n) -- generate a Sample of n random numbers.
41 """
42 if n is None:
43 return s._random()
44 else:
45 return Sample([s._random() for i in xrange(n)])
47 def __repr__(self):
48 return StrPrinter.doprint(self)
51 class Normal(ContinuousProbability):
52 """
53 Normal(mu, sigma) represents the normal or Gaussian distribution
54 with mean value mu and standard deviation sigma.
56 Example usage:
58 >>> N = Normal(1, 2)
59 >>> N.mean
61 >>> N.variance
63 >>> N.probability(-oo, 1) # probability on an interval
64 1/2
65 >>> N.probability(1, oo)
66 1/2
67 >>> N.probability(-oo, oo)
69 >>> N.probability(-1, 3)
70 erf(1/2*2**(1/2))
71 >>> _.evalf()
72 0.682689492137086
74 """
75 def __init__(self, mu, sigma):
76 self.mu = sympify(mu)
77 self.sigma = sympify(sigma)
79 mean = property(lambda s: s.mu)
80 median = property(lambda s: s.mu)
81 mode = property(lambda s: s.mu)
82 stddev = property(lambda s: s.sigma)
83 variance = property(lambda s: s.sigma**2)
85 def pdf(s, x):
86 """Return the probability density function as an expression in x"""
87 x = sympify(x)
88 return 1/(s.sigma*sqrt(2*pi)) * exp(-(x-s.mu)**2 / (2*s.sigma**2))
90 def cdf(s, x):
91 """Return the cumulative density function as an expression in x"""
92 x = sympify(x)
93 return (1+erf((x-s.mu)/(s.sigma*sqrt(2))))/2
95 def _random(s):
96 return random.gauss(float(s.mu), float(s.sigma))
98 def confidence(s, p):
99 """Return a symmetric (p*100)% confidence interval. For example,
100 p=0.95 gives a 95% confidence interval. Currently this function
101 only handles numerical values except in the trivial case p=1.
103 Examples usage:
104 # One standard deviation
105 >>> N = Normal(0, 1)
106 >>> N.confidence(0.68)
107 (-0.994457883209753, 0.994457883209753)
108 >>> N.probability(*_).evalf()
109 0.680000000000000
111 # Two standard deviations
112 >>> N = Normal(0, 1)
113 >>> N.confidence(0.95)
114 (-1.95996398454005, 1.95996398454005)
115 >>> N.probability(*_).evalf()
116 0.950000000000000
119 if p == 1:
120 return (-oo, oo)
122 assert p <= 1
124 # In terms of n*sigma, we have n = sqrt(2)*ierf(p). The inverse
125 # error function is not yet implemented in SymPy but can easily be
126 # computed numerically
128 from sympy.mpmath import mpf, secant, erf
130 p = mpf(p)
131 # calculate y = ierf(p) by solving erf(y) - p = 0
132 y = secant(lambda y: erf(y) - p, 0)
133 t = Real(str(mpf(float(s.sigma)) * mpf(2)**0.5 * y))
134 mu = s.mu.evalf()
135 return (mu-t, mu+t)
137 @staticmethod
138 def fit(sample):
139 """Create a normal distribution fit to the mean and standard
140 deviation of the given distribution or sample."""
141 if not hasattr(sample, "stddev"):
142 sample = Sample(sample)
143 return Normal(sample.mean, sample.stddev)
146 class Uniform(ContinuousProbability):
148 Uniform(a, b) represents a probability distribution with uniform
149 probability density on the interval [a, b] and zero density
150 everywhere else.
152 def __init__(self, a, b):
153 self.a = sympify(a)
154 self.b = sympify(b)
156 mean = property(lambda s: (s.a+s.b)/2)
157 median = property(lambda s: (s.a+s.b)/2)
158 mode = property(lambda s: (s.a+s.b)/2) # arbitrary
159 variance = property(lambda s: (s.b-s.a)**2 / 12)
160 stddev = property(lambda s: sqrt(s.variance))
162 def pdf(s, x):
163 """Return the probability density function as an expression in x"""
164 x = sympify(x)
165 if not x.is_Number:
166 raise NotImplementedError("SymPy does not yet support"
167 "piecewise functions")
168 if x < s.a or x > s.b:
169 return Rational(0)
170 return 1/(s.b-s.a)
172 def cdf(s, x):
173 """Return the cumulative density function as an expression in x"""
174 x = sympify(x)
175 if not x.is_Number:
176 raise NotImplementedError("SymPy does not yet support"
177 "piecewise functions")
178 if x <= s.a:
179 return Rational(0)
180 if x >= s.b:
181 return Rational(1)
182 return (x-s.a)/(s.b-s.a)
184 def _random(s):
185 return Real(random.uniform(float(s.a), float(s.b)))
187 def confidence(s, p):
188 """Generate a symmetric (p*100)% confidence interval.
190 >>> U = Uniform(1, 2)
191 >>> U.confidence(1)
192 (1, 2)
193 >>> U.confidence(Rational(1,2))
194 (5/4, 7/4)
196 p = sympify(p)
197 assert p <= 1
199 d = (s.b-s.a)*p / 2
200 return (s.mean - d, s.mean + d)
202 @staticmethod
203 def fit(sample):
204 """Create a uniform distribution fit to the mean and standard
205 deviation of the given distribution or sample."""
206 if not hasattr(sample, "stddev"):
207 sample = Sample(sample)
208 m = sample.mean
209 d = sqrt(12*sample.variance)/2
210 return Uniform(m-d, m+d)
212 class PDF(ContinuousProbability):
214 PDF(func, (x, a, b)) represents continuous probability distribution
215 with probability distribution function func(x) on interval (a, b)
217 If func is not normalized so that integrate(func, (x, a, b)) == 1,
218 it can be normalized using PDF.normalize() method
220 Example usage:
222 >>> from sympy import Symbol
223 >>> x = Symbol('x')
224 >>> a = Symbol('a', positive=True)
226 >>> exponential = PDF(exp(-x/a)/a, (x,0,oo))
227 >>> exponential.pdf(x)
228 1/a*exp(-x/a)
229 >>> exponential.cdf(x)
230 1 - exp(-x/a)
231 >>> exponential.mean
233 >>> exponential.variance
234 a**2
237 def __init__(self, pdf, var):
238 #XXX maybe add some checking of parameters
239 if isinstance(var, (tuple, list)):
240 self.pdf = Lambda(var[0], pdf)
241 self.domain = tuple(var[1:])
242 else:
243 self.pdf = Lambda(var, pdf)
244 self.domain = (-oo, oo)
245 self._cdf = None
246 self._mean = None
247 self._variance = None
248 self._stddev = None
251 def normalize(self):
253 Normalize the probability distribution function so that
254 integrate(self.pdf(x), (x, a, b)) == 1
256 Example usage:
258 >>> from sympy import Symbol
259 >>> x = Symbol('x')
260 >>> a = Symbol('a', positive=True)
262 >>> exponential = PDF(exp(-x/a), (x,0,oo))
263 >>> exponential.normalize().pdf(x)
264 1/a*exp(-x/a)
267 norm = self.probability(*self.domain)
268 if norm != 1:
269 w = Symbol('w', real=True, dummy=True)
270 return self.__class__(self.pdf(w)/norm, (w, self.domain[0], self.domain[1]))
271 #self._cdf = Lambda(w, (self.cdf(w) - self.cdf(self.domain[0]))/norm)
272 #if self._mean is not None:
273 # self._mean /= norm
274 #if self._variance is not None:
275 # self._variance = (self._variance + (self._mean*norm)**2)/norm - self.mean**2
276 #if self._stddev is not None:
277 # self._stddev = sqrt(self._variance)
278 else:
279 return self
282 def cdf(self, x):
283 x = sympify(x)
284 if self._cdf is not None:
285 return self._cdf(x)
286 else:
287 from sympy import integrate
288 w = Symbol('w', real=True, dummy=True)
289 self._cdf = integrate(self.pdf(w), w)
290 self._cdf = Lambda(w, self._cdf - self._cdf.subs(w, self.domain[0]))
291 return self._cdf(x)
293 def _get_mean(self):
294 if self._mean is not None:
295 return self._mean
296 else:
297 from sympy import integrate
298 w = Symbol('w', real=True, dummy=True)
299 self._mean = integrate(self.pdf(w)*w,(w,self.domain[0],self.domain[1]))
300 return self._mean
302 def _get_variance(self):
303 if self._variance is not None:
304 return self._variance
305 else:
306 from sympy import integrate, trim, together
307 w = Symbol('w', real=True, dummy=True)
308 self._variance = integrate(self.pdf(w)*w**2,(w,self.domain[0],self.domain[1])) - self.mean**2
309 self._variance = trim(self._variance)
310 return self._variance
312 def _get_stddev(self):
313 if self._stddev is not None:
314 return self._stddev
315 else:
316 self._stddev = sqrt(self.variance)
317 return self._stddev
319 mean = property(_get_mean)
320 variance = property(_get_variance)
321 stddev = property(_get_stddev)
324 def _random(s):
325 raise NotImplementedError
327 def transform(self,func,var):
328 """Return a probability distribution of random variable func(x)
329 currently only some simple injective functions are supported"""
331 w = Symbol('w', real=True, dummy=True)
333 from sympy import solve
334 inverse = solve(func-w, var)
335 newPdf = S.Zero
336 funcdiff = func.diff(var)
337 #TODO check if x is in domain
338 for x in inverse:
339 # this assignment holds only for x in domain
340 # in general it would require implementing
341 # piecewise defined functions in sympy
342 newPdf += (self.pdf(var)/abs(funcdiff)).subs(var,x)
344 return PDF(newPdf, (w, func.subs(var, self.domain[0]), func.subs(var, self.domain[1])))