reduce/apply/values corrections
[CommonLispStat.git] / statistics.lsp
blob1e58dd30f6647a88311f32cd541774ee20afd56d
1 ;;; -*- mode: lisp -*-
2 ;;; Copyright (c) 2005--2007, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
6 ;;; XLisp-ism's removed to focus on Common Lisp. Original source from:
7 ;;;; statistics.lsp XLISP-STAT statistics functions
8 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
9 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
10 ;;;; You may give out copies of this software; for conditions see the file
11 ;;;; COPYING included with this distribution.
13 (in-package :cl-user)
15 (defpackage :lisp-stat-descriptive-statistics
16 (:use :common-lisp
17 :lisp-stat-data
18 :lisp-stat-math)
19 (:shadowing-import-from :lisp-stat-math
20 expt + - * / ** mod rem abs 1+ 1- log exp sqrt sin cos tan
21 asin acos atan sinh cosh tanh asinh acosh atanh float random
22 truncate floor ceiling round minusp zerop plusp evenp oddp
23 < <= = /= >= > complex conjugate realpart imagpart phase
24 min max logand logior logxor lognot ffloor fceiling
25 ftruncate fround signum cis)
26 (:export ;; descriptive stats
27 standard-deviation quantile median interquartile-range
28 fivnum
30 sample
32 ;; the following are more matrix-centric
33 covariance-matrix matrix print-matrix solve
34 backsolve eigenvalues eigenvectors accumulate cumsum combine
35 lowess))
37 (in-package :lisp-stat-descriptive-statistics)
39 ;;;;
40 ;;;; Basic Summary Statistics
41 ;;;;
43 (defun standard-deviation (x)
44 "Args: (x)
45 Returns the standard deviation of the elements x. Vector reducing."
46 (let ((n (count-elements x))
47 (r (- x (mean x))))
48 (sqrt (* (mean (* r r)) (/ n (- n 1))))))
51 ;; FIXME the following assume that we are using the vector based functions
52 (defun quantile (x p)
53 "Args: (x p)
54 Returns the P-th quantile(s) of sequence X. P can be a number or a sequence."
55 (let* ((x (sort-data x))
56 (n (length x))
57 (np (* p (- n 1)))
58 (low (floor np))
59 (high (ceiling np)))
60 (/ (+ (select x low) (select x high)) 2)))
62 (defun median (x)
63 "Args: (x)
64 Returns the median of the elements of X."
65 (quantile x 0.5))
67 (defun interquartile-range (x)
68 "Args: (number-data)
69 Returns the interquartile range of the elements of X."
70 (reduce #'- (quantile x '(0.75 0.25))))
72 (defun fivnum (x)
73 "Args: (number-data)
74 Returns the five number summary (min, 1st quartile, medinan, 3rd quartile,
75 max) of the elements X."
76 (quantile x '(0 .25 .5 .75 1)))
78 (defun covariance-matrix (&rest args)
79 "Args: (&rest args)
80 Returns the sample covariance matrix of the data columns in ARGS. ARGS may
81 consist of lists, vectors or matrices."
82 (let ((columns (apply #'append
83 (mapcar #'(lambda (x)
84 (if (matrixp x) (column-list x) (list x)))
85 args))))
86 (/ (cross-product (apply #'bind-columns
87 (- columns (mapcar #'mean columns))))
88 (- (length (car columns)) 1))))
90 ;;;; Sampling / Resampling
92 (defun sample (x ssize &optional replace)
93 "Args: (x n &optional (replace nil))
94 Returns a list of a random sample of size N from sequence X drawn with or
95 without replacement."
96 (check-sequence x)
97 (let ((n (length x))
98 (x (if (consp x) (coerce x 'vector) (copy-vector x)))
99 (result nil))
100 (if (< 0 n)
101 (dotimes (i ssize result)
102 (let ((j (if replace (random n) (+ i (random (- n i))))))
103 (setf result (cons (aref x j) result))
104 (unless replace ;; swap elements i and j
105 (let ((temp (aref x i)))
106 (setf (aref x i) (aref x j))
107 (setf (aref x j) temp))))))))
111 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
112 ;;;;
113 ;;;; Sorting Functions
114 ;;;;
115 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
117 (defun sort-data (x)
118 "Args: (sequence)
119 Returns a sequence with the numbers or strings in the sequence X in order."
120 (flet ((less (x y) (if (numberp x) (< x y) (string-lessp x y))))
121 (stable-sort (copy-seq (compound-data-seq x)) #'less)))
123 (defun order (x)
124 "Args (x)
125 Returns a sequence of the indices of elements in the sequence of numbers
126 or strings X in order."
127 (let* ((seq (compound-data-seq x))
128 (type (if (consp seq) 'list 'vector))
129 (i -1))
130 (flet ((entry (x) (setf i (+ i 1)) (list x i))
131 (less (a b)
132 (let ((x (first a))
133 (y (first b)))
134 (if (numberp x) (< x y) (string-lessp x y)))))
135 (let ((sorted-seq (stable-sort (map type #'entry seq) #'less)))
136 (map type #'second sorted-seq)))))
138 ;; this isn't destructive -- do we document destructive only, or any
139 ;; variant?
140 (defun rank (x)
141 "Args (x)
142 Returns a sequence with the elements of the list or array of numbers or
143 strings X replaced by their ranks."
144 (let ((ranked-seq (order (order x))))
145 (make-compound-data (compound-data-shape x) ranked-seq)))
147 (defun mean (x)
148 "Args: (x)
149 Returns the mean of the elements x. Vector reducing."
150 (let ((mean 0.0)
151 (count 0.0))
152 (labels ((add-to-mean (x)
153 (let ((count+1 (+ count 1.0)))
154 (setf mean (+ (* (/ count count+1) mean) (* (/ count+1) x)))
155 (setf count count+1)))
156 (find-mean (x)
157 (if (numberp x)
158 (add-to-mean x)
159 (let ((seq (compound-data-seq x)))
160 (if (consp seq)
161 (dolist (x seq)
162 (if (numberp x) (add-to-mean x) (find-mean x)))
163 (let ((n (length seq)))
164 (dotimes (i n)
165 (declare (fixnum i))
166 (let ((x (aref seq i)))
167 (if (numberp x)
168 (add-to-mean x)
169 (find-mean x))))))))))
170 (find-mean x)
171 mean)))
173 ;;;;
174 ;;;; Linear Algebra Functions
175 ;;;;
177 (defun matrix (dim data)
178 "Args: (dim data)
179 returns a matrix of dimensions DIM initialized using sequence DATA
180 in row major order."
181 (let ((dim (coerce dim 'list))
182 (data (coerce data 'list)))
183 (make-array dim :initial-contents (split-list data (nth 1 dim)))))
185 (defun print-matrix (a &optional (stream *standard-output*))
186 "Args: (matrix &optional stream)
187 Prints MATRIX to STREAM in a nice form that is still machine readable"
188 (unless (matrixp a) (error "not a matrix - ~a" a))
189 (let ((size (min 15 (max (map-elements #'flatsize a)))))
190 (format stream "#2a(~%")
191 (dolist (x (row-list a))
192 (format stream " (")
193 (let ((n (length x)))
194 (dotimes (i n)
195 (let ((y (aref x i)))
196 (cond
197 ((integerp y) (format stream "~vd" size y))
198 ((floatp y) (format stream "~vg" size y))
199 (t (format stream "~va" size y))))
200 (if (< i (- n 1)) (format stream " "))))
201 (format stream ")~%"))
202 (format stream " )~%")
203 nil))
205 (defun solve (a b)
206 "Args: (a b)
207 Solves A x = B using LU decomposition and backsolving. B can be a sequence
208 or a matrix."
209 (let ((lu (lu-decomp a)))
210 (if (matrixp b)
211 (apply #'bind-columns
212 (mapcar #'(lambda (x) (lu-solve lu x)) (column-list b)))
213 (lu-solve lu b))))
215 (defun backsolve (a b)
216 "Args: (a b)
217 Solves A x = B by backsolving, assuming A is upper triangular. B must be a
218 sequence. For use with qr-decomp."
219 (let* ((n (length b))
220 (sol (make-array n)))
221 (dotimes (i n)
222 (let* ((k (- n i 1))
223 (val (elt b k)))
224 (dotimes (j i)
225 (let ((l (- n j 1)))
226 (setq val (- val (* (aref sol l) (aref a k l))))))
227 (setf (aref sol k) (/ val (aref a k k)))))
228 (if (listp b) (coerce sol 'list) sol)))
230 (defun eigenvalues (a)
231 "Args: (a)
232 Returns list of eigenvalues of square, symmetric matrix A"
233 (first (eigen a)))
235 (defun eigenvectors (a)
236 "Args: (a)
237 Returns list of eigenvectors of square, symmetric matrix A"
238 (second (eigen a)))
240 (defun accumulate (f s)
241 "Args: (f s)
242 Accumulates elements of sequence S using binary function F.
243 (accumulate #'+ x) returns the cumulative sum of x."
244 (let* ((result (list (elt s 0)))
245 (tail result))
246 (flet ((acc (dummy x)
247 (rplacd tail (list (funcall f (first tail) x)))
248 (setf tail (cdr tail))))
249 (reduce #'acc s))
250 (if (vectorp s) (coerce result 'vector) result)))
252 (defun cumsum (x)
253 "Args: (x)
254 Returns the cumulative sum of X."
255 (accumulate #'+ x))
257 (defun combine (&rest args)
258 "Args (&rest args)
259 Returns sequence of elements of all arguments."
260 (copy-seq (element-seq args)))
262 (defun lowess (x y &key (f .25) (steps 2) (delta -1) sorted)
263 "Args: (x y &key (f .25) (steps 2) delta sorted)
264 Returns (list X YS) with YS the LOWESS fit. F is the fraction of data used for
265 each point, STEPS is the number of robust iterations. Fits for points within
266 DELTA of each other are interpolated linearly. If the X values setting SORTED
267 to T speeds up the computation."
268 (let ((x (if sorted x (sort-data x)))
269 (y (if sorted y (select y (order x))))
270 (delta (if (> delta 0.0) delta (/ (- (max x) (min x)) 50))))
271 (list x)));; (|base-lowess| x y f steps delta))))