point: non-implemented code describes a possible data analysis approach to implement...
[CommonLispStat.git] / src / probability / probability.lisp
blobaea59d523b085a027eec28f76e3e92f78d17a948
1 ;;; -*- mode: lisp -*-
3 ;;; Time-stamp: <2010-11-30 08:55:58 tony>
4 ;;; Creation: <2010-11-06 01:51:20 tony>
5 ;;; File: probability.lisp
6 ;;; Author: AJ Rossini <blindglobe@gmail.com>
7 ;;; Copyright: (c)2010--, AJ Rossini. Currently licensed under MIT
8 ;;; license. See file LICENSE.mit in top-level directory
9 ;;; for information.
10 ;;; Purpose: Probability functions
12 ;;; What is this talk of 'release'? Klingons do not make software
13 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
14 ;;; designers and quality assurance people in its wake.
16 ;;; This organization and structure is new to the 21st Century
17 ;;; version.. Think, "21st Century Schizoid Man".
20 ;;; Current computations are handled by leveraging the cl-variates and
21 ;;; gsll packages, as they have flexibility and the capability to be
22 ;;; reproducible. This is just a stub for the interface/API that we
23 ;;; would like to be able to use.
25 ;;; A bit of theory behind this:
26 ;;; We would like to think about where probability fits into
27 ;;; statistics. For example, consider the simple case of the "mean".
28 ;;; If we treat both empirical distributions and theoretical
29 ;;; distributions as equivalent first-class objects, it leads to some
30 ;;; computationally interesting ways of doing things. This means that
31 ;;; observed data implies a first-class (discrete) empirical
32 ;;; distribution in its first right, and also implies that we need to
33 ;;; include a means of computing both functionals of 1 distribution as
34 ;;; well as functionals of multiple distributions (think of the
35 ;;; kullback-leibler distance).
36 ;;;
37 ;;; This file should describe the primary generic functions and data
38 ;;; structures that imply what is feasible in this case.
39 ;;;
40 ;;; We will map empirical and theoretical distributions into this
41 ;;; structure in different files, as well as common (and uncommon)
42 ;;; functionals.
45 (in-package :cls-probability)
47 (defgeneric density (probability-law-instance value))
49 (defmethod density ((pli probability-law) (value real)))
50 (defmethod density ((pli probability-law) (value list)))
51 (defmethod density ((pli probability-law) (value vector-like)))
54 (defgeneric distribution (probability-law-instance value))
56 (defgeneric quantile (probability-law-instance value))
58 (defgeneric interquartile-range (probability-law))
60 (defgeneric draw-variates (probability-law-instance n))
62 (defclass probability-parameters ()
64 :documentation "Virtual class to denote prob parameters")
66 ;; TODO: need to understand how to manage multiple parameterizations
67 ;; for the same family of probability laws.
69 (defclass probability-law ()
70 ((density-function
71 :type prob-function
72 :documention "density function, if exists")
73 (mass-function
74 :type prob-function
75 :documention "function")
76 (support-function
77 :documentation "List of values for discrete mass functions, list of pairs denoting for ranges")
78 (support-class
79 :type symbol
80 :documentation "'REAL, 'DISCRETE'")
81 (parameters
82 :type list)
83 (prng-stream
84 :type unif-stream
85 :documentation "current underlying prng stream object, typically
86 Uniform[0,1], but could be Uniform{0,...,n} for some discrete pRNGs."))
87 (:documentation "Sufficient data to compute probabilistic quantities
88 and draw from the particularly specified probability law. Given the
89 support of the probability law, and a function mapping the law to
90 the prob result, we can compute, in an expensive manner, most
91 quantities. When feasible, we can accelerate this quite a bit.")
92 ())
97 ;; We basically want to support the following style of construct:
99 (let ((my-abstract-law (make 'probability-law
100 :density/mass (gaussian-law
101 :parameters '(:mean 5 :variance 3))
102 :seed 1324
103 :name "Gaussian(5,3)"
104 :documentation "model-distribution, used
105 for likelihoods, probabilities of
106 asympts, and other somethings."))
107 (my-empirical-law (make 'probability-law
108 :density/mass (empirical-law data-vector-or-data-list)
109 :seed 415
110 :name "Empirical Law from observations"
111 :documentation "based on observations,
112 bootstrap/resampling style
113 probability.")))
114 (mean my-law)
115 (variance my-law)
116 (standard-deviation my-law)
117 (draw-variates my-law 10)
118 ;; one of the following would return a number, the other would return a 'nil
119 (probability-density-function my-law x)
120 (probability-mass-function my-law x)
121 (cumulative-distribution-function my-law x)
122 (survivorship-function my-law x)
123 (hazard-function my-law x)
124 (cumulative-hazard-function my-law x)
127 (mean my-empirical-law) ; empirical mean
128 (draw-variates my-empirical-law 10) ; bootstrap (unweighted)
129 ;; the rest would consist of empirical