set up to use quicklisp to load cairo2d, will need to do the same for cl-2d
[CommonLispStat.git] / src / describe / statistics.lsp
blob85383eb86175bf98d4949e10544ddd44015bc60a
1 ;;; -*- mode: lisp -*-
2 ;;; Copyright (c) 2005--2009, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
6 ;;; XLisp-ism's removed to focus on Common Lisp. Original source from:
7 ;;;; statistics.lsp XLISP-STAT statistics functions
8 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
9 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
10 ;;;; You may give out copies of this software; for conditions see the file
11 ;;;; COPYING included with this distribution.
13 (in-package :lisp-stat-descriptive-statistics)
15 ;;;
16 ;;; Basic Summary Statistics
17 ;;;
19 (defgeneric mean (x)
20 (:documentation "compute the mean of lists, vectors, various objects")
21 (:method ((x list))
22 (/ (reduce #'+ x)
23 (length x)))
24 (:method ((x vector-like))
25 ;; (defparameter *x* (make-vector 5 :initial-contents '((1d0 2d0 3d0 4d0 5d0))))
26 (/ (loop for i from 0 to (- (nelts x) 1)
27 summing (vref x i))
28 (nelts x)))
29 #| ;; a more traditional versino of the above...
30 (:method ((x vector-like))
31 (let ((n (nelts x))
32 (type (if (col-vector-p x) :column :row)))
33 (/ (gemm x (ones
34 (ecase type (:row 1) (:column n))
35 (ecase type (:row n) (:column 1))))
36 n)))
40 (defun mean-fn (x)
41 "Args: (x)
42 Returns the mean of the elements x. Vector reducing.
44 FIXME: Why is this so complex? When figure out, put into generic."
45 (let ((mean 0.0)
46 (count 0.0))
47 (labels ((add-to-mean (x)
48 (let ((count+1 (+ count 1.0)))
49 (setf mean (+ (* (/ count count+1) mean) (* (/ count+1) x)))
50 (setf count count+1)))
51 (find-mean (x)
52 (if (numberp x)
53 (add-to-mean x)
54 (let ((seq (compound-data-seq x)))
55 (if (consp seq)
56 (dolist (x seq)
57 (if (numberp x) (add-to-mean x) (find-mean x)))
58 (let ((n (length seq)))
59 (dotimes (i n)
60 (declare (fixnum i))
61 (let ((x (aref seq i)))
62 (if (numberp x)
63 (add-to-mean x)
64 (find-mean x))))))))))
65 (find-mean x)
66 mean)))
69 ;; We do the variance, since the STANDARD-DEVIATION is simply the root
70 ;; (for interesting definitions of "simply"), and VARIANCE is a bit
71 ;; more general.
72 (defgeneric variance (x)
73 (:documentation "compute the variance of the entity X, i.e. if a
74 scalar, vector, or (covariance) matrix.")
75 (:method ((x sequence))
76 (let ((n (length x))
77 (r (- x (mean x))))
78 (sqrt (* (mean (* r r)) (/ n (- n 1))))))
79 (:method ((x vector-like))
80 ;; FIXME!!!
81 (let* ((negone nil) ;; FIXME!
82 (negresid (axpy (mean x) negone x))) ; -mu + x = x - mu
83 (/ (loop for i from 0 to (- (nelts x) 1)
84 summing (* (vref negresid i)
85 (vref negresid i)))
86 (- (nelts negresid) 1))))
87 (:method ((x matrix-like))
88 (error "FIXME: implement variance for matrices (i.e. var-covar on variables, or?).")))
90 (defun covariance-matrix (&rest args)
91 "Args: (&rest args)
92 Returns the sample covariance matrix of the data columns in ARGS. ARGS may
93 consist of lists, vectors or matrices."
94 (let ((columns (apply #'append
95 (mapcar #'(lambda (x)
96 (if (typep x 'matrix-like)
97 (list-of-columns x)
98 (list x)))
99 args))))
100 (/ (cross-product (reduce #'bind2
101 (- columns (mapcar #'mean columns))))
102 (- (length (car columns)) 1))))
104 (defgeneric standard-deviation (x)
105 (:documentation "Compute standard deivation of entity X.")
106 (:method ((x vector-like)) (sqrt (variance x)))
107 (:method ((x list))
108 ;; if elements are not numeric, error, otherwise
109 (sqrt (variance x)))
110 (:method ((x matrix-like))
111 (error "FIXME: implement SD for (square) matrix-like objects")))
113 (defun standard-deviation-old (x)
114 "Args: (x)
115 Returns the standard deviation of the elements x. Vector reducing.
117 FIXME AJR: This should be redone as square-root of the variance, and
118 defer to that structure for computation. The justification for this
119 'redo' is that it holds for many generalized variance structures such
120 as variance-covariance matrices."
121 (let ((n (count-elements x))
122 (r (- x (mean x))))
123 (sqrt (* (mean (* r r)) (/ n (- n 1))))))
125 (defgeneric quantile (x p)
126 (:documentation "Returns the P-th quantile(s) of sequence X as a
127 native lisp list (which could be fed into a vector-like or similar
128 for the final format).
130 NOT OPTIMIZED. This approach uses the native Common Lisp sort,
131 which is destructive. We can optimize space usage by using some
132 form of rank computations instead.")
133 (:method ((x sequence) (p real))
134 (let ((np (* p (- (length x) 1)))
135 (x-copy x))
136 (mean (list (aref x-copy (floor np))
137 (aref x-copy (ceiling np)))))) ;; aref work in general for lists too, or use nth?
138 (:method ((x vector-like) (p real)) ;; average of sorted elements. Could store compile
139 (let ((np (* p (- (nelts x) 1)))
140 (x-copy x))
141 (mean (list (vref (sort x-copy #'<) (floor np))
142 (vref (sort x-copy #'<) (ceiling np))))))
143 (:method ((x sequence) (p sequence))
144 (error "FIXME: generalize. Basically, do mapcar or similar across a vector."))
145 (:method ((x vector-like) (p sequence))
146 (error "FIXME: generalize."))
147 (:method ((x vector-like) (p vector-like))
148 (error "FIXME: generalize.")))
150 ;;; things to build on top of quantile.
152 ;; Do as generics if possible, keeping in mind that quantile returns a
153 ;; raw lisp list. Nearly all of these use native lisp structures for speed,
154 ;; such as raw lists for single and vector values, and arrays
155 ;; (simple?) for matrix-values and array-valued results.
157 (defun median (x)
158 "Return median of X, using whatever the quantile generic function supports."
159 (quantile x 0.5))
161 (defun interquartile-range (x)
162 "Returns the interquartile range of the elements of X."
163 (reduce #'- (quantile x (list 0.75 0.25))))
165 (defun fivnum (x)
166 "Returns the five number summary (min, 1st quartile, medinan, 3rd quartile,
167 max) of the elements X."
168 (quantile x (list 0 .25 .5 .75 1)))
170 ;;;; Sampling / Resampling
172 (defgeneric sample (x n &optional replace weights)
173 (:documentation "Draw a sample of size n from sequence x, with/out
174 replacement, with weights per element of x as described as a
175 probability mass distribution vector. Length of weights should
176 match length of X. Unclear if (= X (uniq X))? (possible issue).
177 Return value shoud match the input values.")
178 (:method ((x sequence) (n fixnum)
179 &optional replace weights)
180 (declare (ignorable weights))
181 (sample-fn x n replace weights))
182 (:method ((x vector-like) (n fixnum)
183 &optional replace weights)
184 (declare (ignorable weights replace))
185 (error "SAMPLE for vector-likes not implemented yet.")))
187 (defun sample-fn (x ssize &optional replace weights)
188 "Args: (x n &optional (replace nil))
189 Returns a list of a random sample of size N from sequence X drawn with or
190 without replacement.
192 IMPROVE: need to implement sampling with weights."
193 (declare (ignorable weights)) ;; need to implement
194 (check-sequence x)
195 (let ((n (length x))
196 (x (if (consp x) (coerce x 'vector) (copy-seq x)))
197 (result nil))
198 (if (< 0 n)
199 (dotimes (i ssize result)
200 (let ((j (if replace (random n) (+ i (random (- n i))))))
201 (setf result (cons (aref x j) result))
202 (unless replace ;; swap elements i and j
203 (let ((temp (aref x i)))
204 (setf (aref x i) (aref x j))
205 (setf (aref x j) temp))))))))