in-lined docs about what mode is intending
[CommonLispStat.git] / statistics.lsp
blob3407ddd54b13b41d31b640074983c178f9e031ff
1 ;;; -*- mode: lisp -*-
2 ;;; Copyright (c) 2005--2007, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
6 ;;; XLisp-ism's removed to focus on Common Lisp. Original source from:
7 ;;;; statistics.lsp XLISP-STAT statistics functions
8 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
9 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
10 ;;;; You may give out copies of this software; for conditions see the file
11 ;;;; COPYING included with this distribution.
13 (in-package :cl-user)
15 (defpackage :lisp-stat-descriptive-statistics
16 (:use :common-lisp
17 :lisp-stat-data)
18 (:export ;; descriptive stats
19 standard-deviation quantile median interquartile-range
20 fivnum
22 sample
24 ;; the following are more matrix-centric
25 covariance-matrix matrix print-matrix solve
26 backsolve eigenvalues eigenvectors accumulate cumsum combine
27 lowess))
29 (in-package :lisp-stat-descriptive-statistics)
31 ;;;;
32 ;;;; Basic Summary Statistics
33 ;;;;
35 (defun standard-deviation (x)
36 "Args: (x)
37 Returns the standard deviation of the elements x. Vector reducing."
38 (let ((n (count-elements x))
39 (r (- x (mean x))))
40 (sqrt (* (mean (* r r)) (/ n (- n 1))))))
42 (defun quantile (x p)
43 "Args: (x p)
44 Returns the P-th quantile(s) of sequence X. P can be a number or a sequence."
45 (let* ((x (sort-data x))
46 (n (length x))
47 (np (* p (- n 1)))
48 (low (floor np))
49 (high (ceiling np)))
50 (/ (+ (select x low) (select x high)) 2)))
52 (defun median (x)
53 "Args: (x)
54 Returns the median of the elements of X."
55 (quantile x 0.5))
57 (defun interquartile-range (x)
58 "Args: (number-data)
59 Returns the interquartile range of the elements of X."
60 (apply #'- (quantile x '(0.75 0.25))))
62 (defun fivnum (x)
63 "Args: (number-data)
64 Returns the five number summary (min, 1st quartile, medinan, 3rd quartile,
65 max) of the elements X."
66 (quantile x '(0 .25 .5 .75 1)))
68 (defun covariance-matrix (&rest args)
69 "Args: (&rest args)
70 Returns the sample covariance matrix of the data columns in ARGS. ARGS may
71 consist of lists, vectors or matrices."
72 (let ((columns (apply #'append
73 (mapcar #'(lambda (x)
74 (if (matrixp x) (column-list x) (list x)))
75 args))))
76 (/ (cross-product (apply #'bind-columns
77 (- columns (mapcar #'mean columns))))
78 (- (length (car columns)) 1))))
80 ;;;; Sampling / Resampling
82 (defun sample (x ssize &optional replace)
83 "Args: (x n &optional (replace nil))
84 Returns a list of a random sample of size N from sequence X drawn with or
85 without replacement."
86 (check-sequence x)
87 (let ((n (length x))
88 (x (if (consp x) (coerce x 'vector) (copy-vector x)))
89 (result nil))
90 (if (< 0 n)
91 (dotimes (i ssize result)
92 (let ((j (if replace (random n) (+ i (random (- n i))))))
93 (setf result (cons (aref x j) result))
94 (unless replace ;; swap elements i and j
95 (let ((temp (aref x i)))
96 (setf (aref x i) (aref x j))
97 (setf (aref x j) temp))))))))
101 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
102 ;;;;
103 ;;;; Sorting Functions
104 ;;;;
105 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
107 (defun sort-data (x)
108 "Args: (sequence)
109 Returns a sequence with the numbers or strings in the sequence X in order."
110 (flet ((less (x y) (if (numberp x) (< x y) (string-lessp x y))))
111 (stable-sort (copy-seq (compound-data-seq x)) #'less)))
113 (defun order (x)
114 "Args (x)
115 Returns a sequence of the indices of elements in the sequence of numbers
116 or strings X in order."
117 (let* ((seq (compound-data-seq x))
118 (type (if (consp seq) 'list 'vector))
119 (i -1))
120 (flet ((entry (x) (setf i (+ i 1)) (list x i))
121 (less (a b)
122 (let ((x (first a))
123 (y (first b)))
124 (if (numberp x) (< x y) (string-lessp x y)))))
125 (let ((sorted-seq (stable-sort (map type #'entry seq) #'less)))
126 (map type #'second sorted-seq)))))
128 ;; this isn't destructive -- do we document destructive only, or any
129 ;; variant?
130 (defun rank (x)
131 "Args (x)
132 Returns a sequence with the elements of the list or array of numbers or
133 strings X replaced by their ranks."
134 (let ((ranked-seq (order (order x))))
135 (make-compound-data (compound-data-shape x) ranked-seq)))
137 (defun mean (x)
138 "Args: (x)
139 Returns the mean of the elements x. Vector reducing."
140 (let ((mean 0.0)
141 (count 0.0))
142 (labels ((add-to-mean (x)
143 (let ((count+1 (+ count 1.0)))
144 (setf mean (+ (* (/ count count+1) mean) (* (/ count+1) x)))
145 (setf count count+1)))
146 (find-mean (x)
147 (if (numberp x)
148 (add-to-mean x)
149 (let ((seq (compound-data-seq x)))
150 (if (consp seq)
151 (dolist (x seq)
152 (if (numberp x) (add-to-mean x) (find-mean x)))
153 (let ((n (length seq)))
154 (dotimes (i n)
155 (declare (fixnum i))
156 (let ((x (aref seq i)))
157 (if (numberp x)
158 (add-to-mean x)
159 (find-mean x))))))))))
160 (find-mean x)
161 mean)))
163 ;;;;
164 ;;;; Linear Algebra Functions
165 ;;;;
167 (defun matrix (dim data)
168 "Args: (dim data)
169 returns a matrix of dimensions DIM initialized using sequence DATA
170 in row major order."
171 (let ((dim (coerce dim 'list))
172 (data (coerce data 'list)))
173 (make-array dim :initial-contents (split-list data (nth 1 dim)))))
175 (defun print-matrix (a &optional (stream *standard-output*))
176 "Args: (matrix &optional stream)
177 Prints MATRIX to STREAM in a nice form that is still machine readable"
178 (unless (matrixp a) (error "not a matrix - ~a" a))
179 (let ((size (min 15 (max (map-elements #'flatsize a)))))
180 (format stream "#2a(~%")
181 (dolist (x (row-list a))
182 (format stream " (")
183 (let ((n (length x)))
184 (dotimes (i n)
185 (let ((y (aref x i)))
186 (cond
187 ((integerp y) (format stream "~vd" size y))
188 ((floatp y) (format stream "~vg" size y))
189 (t (format stream "~va" size y))))
190 (if (< i (- n 1)) (format stream " "))))
191 (format stream ")~%"))
192 (format stream " )~%")
193 nil))
195 (defun solve (a b)
196 "Args: (a b)
197 Solves A x = B using LU decomposition and backsolving. B can be a sequence
198 or a matrix."
199 (let ((lu (lu-decomp a)))
200 (if (matrixp b)
201 (apply #'bind-columns
202 (mapcar #'(lambda (x) (lu-solve lu x)) (column-list b)))
203 (lu-solve lu b))))
205 (defun backsolve (a b)
206 "Args: (a b)
207 Solves A x = B by backsolving, assuming A is upper triangular. B must be a
208 sequence. For use with qr-decomp."
209 (let* ((n (length b))
210 (sol (make-array n)))
211 (dotimes (i n)
212 (let* ((k (- n i 1))
213 (val (elt b k)))
214 (dotimes (j i)
215 (let ((l (- n j 1)))
216 (setq val (- val (* (aref sol l) (aref a k l))))))
217 (setf (aref sol k) (/ val (aref a k k)))))
218 (if (listp b) (coerce sol 'list) sol)))
220 (defun eigenvalues (a)
221 "Args: (a)
222 Returns list of eigenvalues of square, symmetric matrix A"
223 (first (eigen a)))
225 (defun eigenvectors (a)
226 "Args: (a)
227 Returns list of eigenvectors of square, symmetric matrix A"
228 (second (eigen a)))
230 (defun accumulate (f s)
231 "Args: (f s)
232 Accumulates elements of sequence S using binary function F.
233 (accumulate #'+ x) returns the cumulative sum of x."
234 (let* ((result (list (elt s 0)))
235 (tail result))
236 (flet ((acc (dummy x)
237 (rplacd tail (list (funcall f (first tail) x)))
238 (setf tail (cdr tail))))
239 (reduce #'acc s))
240 (if (vectorp s) (coerce result 'vector) result)))
242 (defun cumsum (x)
243 "Args: (x)
244 Returns the cumulative sum of X."
245 (accumulate #'+ x))
247 (defun combine (&rest args)
248 "Args (&rest args)
249 Returns sequence of elements of all arguments."
250 (copy-seq (element-seq args)))
252 (defun lowess (x y &key (f .25) (steps 2) (delta -1) sorted)
253 "Args: (x y &key (f .25) (steps 2) delta sorted)
254 Returns (list X YS) with YS the LOWESS fit. F is the fraction of data used for
255 each point, STEPS is the number of robust iterations. Fits for points within
256 DELTA of each other are interpolated linearly. If the X values setting SORTED
257 to T speeds up the computation."
258 (let ((x (if sorted x (sort-data x)))
259 (y (if sorted y (select y (order x))))
260 (delta (if (> delta 0.0) delta (/ (- (max x) (min x)) 50))))
261 (list x)));; (|base-lowess| x y f steps delta))))