ignore fontlock droppings.
[CommonLispStat.git] / statistics.lsp
blob5479719e2e0f9a983d334f3a364e986523da8166
1 ;;; -*- mode: lisp -*-
2 ;;; Copyright (c) 2005--2007, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
6 ;;; XLisp-ism's removed to focus on Common Lisp. Original source from:
7 ;;;; statistics.lsp XLISP-STAT statistics functions
8 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
9 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
10 ;;;; You may give out copies of this software; for conditions see the file
11 ;;;; COPYING included with this distribution.
13 (in-package :cl-user)
15 (defpackage :lisp-stat-descriptive-statistics
16 (:use :common-lisp
17 :lisp-stat-data
18 :lisp-stat-math
19 :lisp-stat-compound-data
20 :lisp-stat-sequence
21 :lisp-stat-matrix
22 :lisp-stat-linalg-data
23 :lisp-stat-linalg
25 :lisp-stat-basics
27 (:shadowing-import-from :lisp-stat-math
28 expt + - * / ** mod rem abs 1+ 1- log exp sqrt sin cos tan
29 asin acos atan sinh cosh tanh asinh acosh atanh float random
30 truncate floor ceiling round minusp zerop plusp evenp oddp
31 < <= = /= >= > complex conjugate realpart imagpart phase
32 min max logand logior logxor lognot ffloor fceiling
33 ftruncate fround signum cis)
34 (:export ;; descriptive stats
35 standard-deviation quantile median interquartile-range
36 fivnum
38 sample
40 ;; the following are more matrix-centric
41 covariance-matrix matrix print-matrix solve
42 backsolve eigenvalues eigenvectors accumulate cumsum combine
43 lowess))
45 (in-package :lisp-stat-descriptive-statistics)
47 ;;;;
48 ;;;; Basic Summary Statistics
49 ;;;;
51 (defun standard-deviation (x)
52 "Args: (x)
53 Returns the standard deviation of the elements x. Vector reducing."
54 (let ((n (count-elements x))
55 (r (- x (mean x))))
56 (sqrt (* (mean (* r r)) (/ n (- n 1))))))
59 ;; FIXME the following assume that we are using the vector based functions
60 (defun quantile (x p)
61 "Args: (x p)
62 Returns the P-th quantile(s) of sequence X. P can be a number or a sequence."
63 (let* ((x (sort-data x))
64 (n (length x))
65 (np (* p (- n 1)))
66 (low (floor np))
67 (high (ceiling np)))
68 (/ (+ (select x low) (select x high)) 2)))
70 (defun median (x)
71 "Args: (x)
72 Returns the median of the elements of X."
73 (quantile x 0.5))
75 (defun interquartile-range (x)
76 "Args: (number-data)
77 Returns the interquartile range of the elements of X."
78 (reduce #'- (quantile x '(0.75 0.25))))
80 (defun fivnum (x)
81 "Args: (number-data)
82 Returns the five number summary (min, 1st quartile, medinan, 3rd quartile,
83 max) of the elements X."
84 (quantile x '(0 .25 .5 .75 1)))
86 (defun covariance-matrix (&rest args)
87 "Args: (&rest args)
88 Returns the sample covariance matrix of the data columns in ARGS. ARGS may
89 consist of lists, vectors or matrices."
90 (let ((columns (apply #'append
91 (mapcar #'(lambda (x)
92 (if (matrixp x) (column-list x) (list x)))
93 args))))
94 (/ (cross-product (apply #'bind-columns
95 (- columns (mapcar #'mean columns))))
96 (- (length (car columns)) 1))))
98 ;;;; Sampling / Resampling
100 (defun sample (x ssize &optional replace)
101 "Args: (x n &optional (replace nil))
102 Returns a list of a random sample of size N from sequence X drawn with or
103 without replacement."
104 (check-sequence x)
105 (let ((n (length x))
106 (x (if (consp x) (coerce x 'vector) (copy-vector x)))
107 (result nil))
108 (if (< 0 n)
109 (dotimes (i ssize result)
110 (let ((j (if replace (random n) (+ i (random (- n i))))))
111 (setf result (cons (aref x j) result))
112 (unless replace ;; swap elements i and j
113 (let ((temp (aref x i)))
114 (setf (aref x i) (aref x j))
115 (setf (aref x j) temp))))))))
119 (defun mean (x)
120 "Args: (x)
121 Returns the mean of the elements x. Vector reducing."
122 (let ((mean 0.0)
123 (count 0.0))
124 (labels ((add-to-mean (x)
125 (let ((count+1 (+ count 1.0)))
126 (setf mean (+ (* (/ count count+1) mean) (* (/ count+1) x)))
127 (setf count count+1)))
128 (find-mean (x)
129 (if (numberp x)
130 (add-to-mean x)
131 (let ((seq (compound-data-seq x)))
132 (if (consp seq)
133 (dolist (x seq)
134 (if (numberp x) (add-to-mean x) (find-mean x)))
135 (let ((n (length seq)))
136 (dotimes (i n)
137 (declare (fixnum i))
138 (let ((x (aref seq i)))
139 (if (numberp x)
140 (add-to-mean x)
141 (find-mean x))))))))))
142 (find-mean x)
143 mean)))
145 ;;;;
146 ;;;; Linear Algebra Functions
147 ;;;;
149 (defun matrix (dim data)
150 "Args: (dim data)
151 returns a matrix of dimensions DIM initialized using sequence DATA
152 in row major order."
153 (let ((dim (coerce dim 'list))
154 (data (coerce data 'list)))
155 (make-array dim :initial-contents (split-list data (nth 1 dim)))))
157 (defun print-matrix (a &optional (stream *standard-output*))
158 "Args: (matrix &optional stream)
159 Prints MATRIX to STREAM in a nice form that is still machine readable"
160 (unless (matrixp a) (error "not a matrix - ~a" a))
161 (let ((size (min 15 (max (map-elements #'flatsize a))))) ;; FIXME: flatsize not defined
162 (format stream "#2a(~%")
163 (dolist (x (row-list a))
164 (format stream " (")
165 (let ((n (length x)))
166 (dotimes (i n)
167 (let ((y (aref x i)))
168 (cond
169 ((integerp y) (format stream "~vd" size y))
170 ((floatp y) (format stream "~vg" size y))
171 (t (format stream "~va" size y))))
172 (if (< i (- n 1)) (format stream " "))))
173 (format stream ")~%"))
174 (format stream " )~%")
175 nil))
177 (defun solve (a b)
178 "Args: (a b)
179 Solves A x = B using LU decomposition and backsolving. B can be a sequence
180 or a matrix."
181 (let ((lu (lu-decomp a)))
182 (if (matrixp b)
183 (apply #'bind-columns
184 (mapcar #'(lambda (x) (lu-solve lu x)) (column-list b)))
185 (lu-solve lu b))))
187 (defun backsolve (a b)
188 "Args: (a b)
189 Solves A x = B by backsolving, assuming A is upper triangular. B must be a
190 sequence. For use with qr-decomp."
191 (let* ((n (length b))
192 (sol (make-array n)))
193 (dotimes (i n)
194 (let* ((k (- n i 1))
195 (val (elt b k)))
196 (dotimes (j i)
197 (let ((l (- n j 1)))
198 (setq val (- val (* (aref sol l) (aref a k l))))))
199 (setf (aref sol k) (/ val (aref a k k)))))
200 (if (listp b) (coerce sol 'list) sol)))
202 (defun eigenvalues (a)
203 "Args: (a)
204 Returns list of eigenvalues of square, symmetric matrix A"
205 (first (eigen a)))
207 (defun eigenvectors (a)
208 "Args: (a)
209 Returns list of eigenvectors of square, symmetric matrix A"
210 (second (eigen a)))
212 (defun accumulate (f s)
213 "Args: (f s)
214 Accumulates elements of sequence S using binary function F.
215 (accumulate #'+ x) returns the cumulative sum of x."
216 (let* ((result (list (elt s 0)))
217 (tail result))
218 (flet ((acc (dummy x)
219 (rplacd tail (list (funcall f (first tail) x)))
220 (setf tail (cdr tail))))
221 (reduce #'acc s))
222 (if (vectorp s) (coerce result 'vector) result)))
224 (defun cumsum (x)
225 "Args: (x)
226 Returns the cumulative sum of X."
227 (accumulate #'+ x))
229 (defun combine (&rest args)
230 "Args (&rest args)
231 Returns sequence of elements of all arguments."
232 (copy-seq (element-seq args)))
234 (defun lowess (x y &key (f .25) (steps 2) (delta -1) sorted)
235 "Args: (x y &key (f .25) (steps 2) delta sorted)
236 Returns (list X YS) with YS the LOWESS fit. F is the fraction of data used for
237 each point, STEPS is the number of robust iterations. Fits for points within
238 DELTA of each other are interpolated linearly. If the X values setting SORTED
239 to T speeds up the computation."
240 (let ((x (if sorted x (sort-data x)))
241 (y (if sorted y (select y (order x))))
242 (delta (if (> delta 0.0) delta (/ (- (max x) (min x)) 50))))
243 (list x)));; (|base-lowess| x y f steps delta))))