remove sequence and combine into compound completely.
[CommonLispStat.git] / statistics.lsp
blobeb4ae74d3b5296983f5500fdbfe50e8463ddf1db
1 ;;; -*- mode: lisp -*-
2 ;;; Copyright (c) 2005--2007, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
6 ;;; XLisp-ism's removed to focus on Common Lisp. Original source from:
7 ;;;; statistics.lsp XLISP-STAT statistics functions
8 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
9 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
10 ;;;; You may give out copies of this software; for conditions see the file
11 ;;;; COPYING included with this distribution.
13 (in-package :cl-user)
15 (defpackage :lisp-stat-descriptive-statistics
16 (:use :common-lisp
17 :lisp-stat-data
18 :lisp-stat-math
19 :lisp-stat-compound-data
20 :lisp-stat-matrix
21 :lisp-stat-linalg-data
22 :lisp-stat-linalg
24 :lisp-stat-basics
26 (:shadowing-import-from :lisp-stat-math
27 expt + - * / ** mod rem abs 1+ 1- log exp sqrt sin cos tan
28 asin acos atan sinh cosh tanh asinh acosh atanh float random
29 truncate floor ceiling round minusp zerop plusp evenp oddp
30 < <= = /= >= > complex conjugate realpart imagpart phase
31 min max logand logior logxor lognot ffloor fceiling
32 ftruncate fround signum cis)
33 (:export ;; descriptive stats
34 standard-deviation quantile median interquartile-range
35 fivnum
37 sample
39 ;; the following are more matrix-centric
40 covariance-matrix matrix print-matrix solve
41 backsolve eigenvalues eigenvectors accumulate cumsum combine
42 lowess))
44 (in-package :lisp-stat-descriptive-statistics)
46 ;;;;
47 ;;;; Basic Summary Statistics
48 ;;;;
50 (defun standard-deviation (x)
51 "Args: (x)
52 Returns the standard deviation of the elements x. Vector reducing."
53 (let ((n (count-elements x))
54 (r (- x (mean x))))
55 (sqrt (* (mean (* r r)) (/ n (- n 1))))))
58 ;; FIXME the following assume that we are using the vector based functions
59 (defun quantile (x p)
60 "Args: (x p)
61 Returns the P-th quantile(s) of sequence X. P can be a number or a sequence."
62 (let* ((x (sort-data x))
63 (n (length x))
64 (np (* p (- n 1)))
65 (low (floor np))
66 (high (ceiling np)))
67 (/ (+ (select x low) (select x high)) 2)))
69 (defun median (x)
70 "Args: (x)
71 Returns the median of the elements of X."
72 (quantile x 0.5))
74 (defun interquartile-range (x)
75 "Args: (number-data)
76 Returns the interquartile range of the elements of X."
77 (reduce #'- (quantile x '(0.75 0.25))))
79 (defun fivnum (x)
80 "Args: (number-data)
81 Returns the five number summary (min, 1st quartile, medinan, 3rd quartile,
82 max) of the elements X."
83 (quantile x '(0 .25 .5 .75 1)))
85 (defun covariance-matrix (&rest args)
86 "Args: (&rest args)
87 Returns the sample covariance matrix of the data columns in ARGS. ARGS may
88 consist of lists, vectors or matrices."
89 (let ((columns (apply #'append
90 (mapcar #'(lambda (x)
91 (if (matrixp x) (column-list x) (list x)))
92 args))))
93 (/ (cross-product (apply #'bind-columns
94 (- columns (mapcar #'mean columns))))
95 (- (length (car columns)) 1))))
97 ;;;; Sampling / Resampling
99 (defun sample (x ssize &optional replace)
100 "Args: (x n &optional (replace nil))
101 Returns a list of a random sample of size N from sequence X drawn with or
102 without replacement."
103 (check-sequence x)
104 (let ((n (length x))
105 (x (if (consp x) (coerce x 'vector) (copy-vector x)))
106 (result nil))
107 (if (< 0 n)
108 (dotimes (i ssize result)
109 (let ((j (if replace (random n) (+ i (random (- n i))))))
110 (setf result (cons (aref x j) result))
111 (unless replace ;; swap elements i and j
112 (let ((temp (aref x i)))
113 (setf (aref x i) (aref x j))
114 (setf (aref x j) temp))))))))
118 (defun mean (x)
119 "Args: (x)
120 Returns the mean of the elements x. Vector reducing."
121 (let ((mean 0.0)
122 (count 0.0))
123 (labels ((add-to-mean (x)
124 (let ((count+1 (+ count 1.0)))
125 (setf mean (+ (* (/ count count+1) mean) (* (/ count+1) x)))
126 (setf count count+1)))
127 (find-mean (x)
128 (if (numberp x)
129 (add-to-mean x)
130 (let ((seq (compound-data-seq x)))
131 (if (consp seq)
132 (dolist (x seq)
133 (if (numberp x) (add-to-mean x) (find-mean x)))
134 (let ((n (length seq)))
135 (dotimes (i n)
136 (declare (fixnum i))
137 (let ((x (aref seq i)))
138 (if (numberp x)
139 (add-to-mean x)
140 (find-mean x))))))))))
141 (find-mean x)
142 mean)))
144 ;;;;
145 ;;;; Linear Algebra Functions
146 ;;;;
148 (defun matrix (dim data)
149 "Args: (dim data)
150 returns a matrix of dimensions DIM initialized using sequence DATA
151 in row major order."
152 (let ((dim (coerce dim 'list))
153 (data (coerce data 'list)))
154 (make-array dim :initial-contents (split-list data (nth 1 dim)))))
156 (defun print-matrix (a &optional (stream *standard-output*))
157 "Args: (matrix &optional stream)
158 Prints MATRIX to STREAM in a nice form that is still machine readable"
159 (unless (matrixp a) (error "not a matrix - ~a" a))
160 (let ((size (min 15 (max (map-elements #'flatsize a))))) ;; FIXME: flatsize not defined
161 (format stream "#2a(~%")
162 (dolist (x (row-list a))
163 (format stream " (")
164 (let ((n (length x)))
165 (dotimes (i n)
166 (let ((y (aref x i)))
167 (cond
168 ((integerp y) (format stream "~vd" size y))
169 ((floatp y) (format stream "~vg" size y))
170 (t (format stream "~va" size y))))
171 (if (< i (- n 1)) (format stream " "))))
172 (format stream ")~%"))
173 (format stream " )~%")
174 nil))
176 (defun solve (a b)
177 "Args: (a b)
178 Solves A x = B using LU decomposition and backsolving. B can be a sequence
179 or a matrix."
180 (let ((lu (lu-decomp a)))
181 (if (matrixp b)
182 (apply #'bind-columns
183 (mapcar #'(lambda (x) (lu-solve lu x)) (column-list b)))
184 (lu-solve lu b))))
186 (defun backsolve (a b)
187 "Args: (a b)
188 Solves A x = B by backsolving, assuming A is upper triangular. B must be a
189 sequence. For use with qr-decomp."
190 (let* ((n (length b))
191 (sol (make-array n)))
192 (dotimes (i n)
193 (let* ((k (- n i 1))
194 (val (elt b k)))
195 (dotimes (j i)
196 (let ((l (- n j 1)))
197 (setq val (- val (* (aref sol l) (aref a k l))))))
198 (setf (aref sol k) (/ val (aref a k k)))))
199 (if (listp b) (coerce sol 'list) sol)))
201 (defun eigenvalues (a)
202 "Args: (a)
203 Returns list of eigenvalues of square, symmetric matrix A"
204 (first (eigen a)))
206 (defun eigenvectors (a)
207 "Args: (a)
208 Returns list of eigenvectors of square, symmetric matrix A"
209 (second (eigen a)))
211 (defun accumulate (f s)
212 "Args: (f s)
213 Accumulates elements of sequence S using binary function F.
214 (accumulate #'+ x) returns the cumulative sum of x."
215 (let* ((result (list (elt s 0)))
216 (tail result))
217 (flet ((acc (dummy x)
218 (rplacd tail (list (funcall f (first tail) x)))
219 (setf tail (cdr tail))))
220 (reduce #'acc s))
221 (if (vectorp s) (coerce result 'vector) result)))
223 (defun cumsum (x)
224 "Args: (x)
225 Returns the cumulative sum of X."
226 (accumulate #'+ x))
228 (defun combine (&rest args)
229 "Args (&rest args)
230 Returns sequence of elements of all arguments."
231 (copy-seq (element-seq args)))
233 (defun lowess (x y &key (f .25) (steps 2) (delta -1) sorted)
234 "Args: (x y &key (f .25) (steps 2) delta sorted)
235 Returns (list X YS) with YS the LOWESS fit. F is the fraction of data used for
236 each point, STEPS is the number of robust iterations. Fits for points within
237 DELTA of each other are interpolated linearly. If the X values setting SORTED
238 to T speeds up the computation."
239 (let ((x (if sorted x (sort-data x)))
240 (y (if sorted y (select y (order x))))
241 (delta (if (> delta 0.0) delta (/ (- (max x) (min x)) 50))))
242 (list x)));; (|base-lowess| x y f steps delta))))