2 ;;; Copyright (c) 2005--2009, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
6 ;;; XLisp-ism's removed to focus on Common Lisp. Original source from:
7 ;;;; statistics.lsp XLISP-STAT statistics functions
8 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
9 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
10 ;;;; You may give out copies of this software; for conditions see the file
11 ;;;; COPYING included with this distribution.
13 (in-package :lisp-stat-descriptive-statistics
)
16 ;;; Basic Summary Statistics
20 (:documentation
"compute the mean of lists, vectors, various objects")
24 (:method
((x vector-like
))
25 ;; (defparameter *x* (make-vector 5 :initial-contents '((1d0 2d0 3d0 4d0 5d0))))
26 (/ (loop for i from
0 to
(- (nelts x
) 1)
29 #|
;; a more traditional versino of the above...
30 (:method
((x vector-like
))
32 (type (if (col-vector-p x
) :column
:row
)))
34 (ecase type
(:row
1) (:column n
))
35 (ecase type
(:row n
) (:column
1))))
42 Returns the mean of the elements x. Vector reducing.
44 FIXME: Why is this so complex? When figure out, put into generic."
47 (labels ((add-to-mean (x)
48 (let ((count+1 (+ count
1.0)))
49 (setf mean
(+ (* (/ count count
+1) mean
) (* (/ count
+1) x
)))
50 (setf count count
+1)))
54 (let ((seq (compound-data-seq x
)))
57 (if (numberp x
) (add-to-mean x
) (find-mean x
)))
58 (let ((n (length seq
)))
61 (let ((x (aref seq i
)))
64 (find-mean x
))))))))))
69 ;; We do the variance, since the STANDARD-DEVIATION is simply the root
70 ;; (for interesting definitions of "simply"), and VARIANCE is a bit
72 (defgeneric variance
(x)
73 (:documentation
"compute the variance of the entity X, i.e. if a
74 scalar, vector, or (covariance) matrix.")
75 (:method
((x sequence
))
78 (sqrt (* (mean (* r r
)) (/ n
(- n
1))))))
79 (:method
((x vector-like
))
81 (let* ((negone nil
) ;; FIXME!
82 (negresid (axpy (mean x
) negone x
))) ; -mu + x = x - mu
83 (/ (loop for i from
0 to
(- (nelts x
) 1)
84 summing
(* (vref negresid i
)
86 (- (nelts negresid
) 1))))
87 (:method
((x matrix-like
))
88 (error "FIXME: implement variance for matrices (i.e. var-covar on variables, or?).")))
90 (defun covariance-matrix (&rest args
)
92 Returns the sample covariance matrix of the data columns in ARGS. ARGS may
93 consist of lists, vectors or matrices."
94 (let ((columns (apply #'append
96 (if (typep x
'matrix-like
)
100 (/ (cross-product (reduce #'bind2
101 (- columns
(mapcar #'mean columns
))))
102 (- (length (car columns
)) 1))))
104 (defgeneric standard-deviation
(x)
105 (:documentation
"Compute standard deivation of entity X.")
106 (:method
((x vector-like
)) (sqrt (variance x
)))
108 ;; if elements are not numeric, error, otherwise
110 (:method
((x matrix-like
))
111 (error "FIXME: implement SD for (square) matrix-like objects")))
113 (defun standard-deviation-old (x)
115 Returns the standard deviation of the elements x. Vector reducing.
117 FIXME AJR: This should be redone as square-root of the variance, and
118 defer to that structure for computation. The justification for this
119 'redo' is that it holds for many generalized variance structures such
120 as variance-covariance matrices."
121 (let ((n (count-elements x
))
123 (sqrt (* (mean (* r r
)) (/ n
(- n
1))))))
125 (defgeneric quantile
(x p
)
126 (:documentation
"Returns the P-th quantile(s) of sequence X as a
127 native lisp list (which could be fed into a vector-like or similar
128 for the final format).
130 NOT OPTIMIZED. This approach uses the native Common Lisp sort,
131 which is destructive. We can optimize space usage by using some
132 form of rank computations instead.")
133 (:method
((x sequence
) (p real
))
134 (let ((np (* p
(- (length x
) 1)))
136 (mean (list (aref x-copy
(floor np
))
137 (aref x-copy
(ceiling np
)))))) ;; aref work in general for lists too, or use nth?
138 (:method
((x vector-like
) (p real
)) ;; average of sorted elements. Could store compile
139 (let ((np (* p
(- (nelts x
) 1)))
141 (mean (list (vref (sort x-copy
#'<) (floor np
))
142 (vref (sort x-copy
#'<) (ceiling np
))))))
143 (:method
((x sequence
) (p sequence
))
144 (error "FIXME: generalize. Basically, do mapcar or similar across a vector."))
145 (:method
((x vector-like
) (p sequence
))
146 (error "FIXME: generalize."))
147 (:method
((x vector-like
) (p vector-like
))
148 (error "FIXME: generalize.")))
150 ;;; things to build on top of quantile.
152 ;; Do as generics if possible, keeping in mind that quantile returns a
153 ;; raw lisp list. Nearly all of these use native lisp structures for speed,
154 ;; such as raw lists for single and vector values, and arrays
155 ;; (simple?) for matrix-values and array-valued results.
158 "Return median of X, using whatever the quantile generic function supports."
161 (defun interquartile-range (x)
162 "Returns the interquartile range of the elements of X."
163 (reduce #'-
(quantile x
(list 0.75 0.25))))
166 "Returns the five number summary (min, 1st quartile, medinan, 3rd quartile,
167 max) of the elements X."
168 (quantile x
(list 0 .25 .5 .75 1)))
170 ;;;; Sampling / Resampling
172 (defgeneric sample
(x n
&optional replace weights
)
173 (:documentation
"Draw a sample of size n from sequence x, with/out
174 replacement, with weights per element of x as described as a
175 probability mass distribution vector. Length of weights should
176 match length of X. Unclear if (= X (uniq X))? (possible issue).
177 Return value shoud match the input values.")
178 (:method
((x sequence
) (n fixnum
)
179 &optional replace weights
)
180 (declare (ignorable weights
))
181 (sample-fn x n replace weights
))
182 (:method
((x vector-like
) (n fixnum
)
183 &optional replace weights
)
184 (declare (ignorable weights replace
))
185 (error "SAMPLE for vector-likes not implemented yet.")))
187 (defun sample-fn (x ssize
&optional replace weights
)
188 "Args: (x n &optional (replace nil))
189 Returns a list of a random sample of size N from sequence X drawn with or
192 IMPROVE: need to implement sampling with weights."
193 (declare (ignorable weights
)) ;; need to implement
196 (x (if (consp x
) (coerce x
'vector
) (copy-seq x
)))
199 (dotimes (i ssize result
)
200 (let ((j (if replace
(random n
) (+ i
(random (- n i
))))))
201 (setf result
(cons (aref x j
) result
))
202 (unless replace
;; swap elements i and j
203 (let ((temp (aref x i
)))
204 (setf (aref x i
) (aref x j
))
205 (setf (aref x j
) temp
))))))))