2 ;;; Copyright (c) 2005--2007, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
6 ;;; XLisp-ism's removed to focus on Common Lisp. Original source from:
7 ;;;; statistics.lsp XLISP-STAT statistics functions
8 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
9 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
10 ;;;; You may give out copies of this software; for conditions see the file
11 ;;;; COPYING included with this distribution.
15 (defpackage :lisp-stat-descriptive-statistics
19 :lisp-stat-compound-data
22 :lisp-stat-linalg-data
27 (:shadowing-import-from
:lisp-stat-math
28 expt
+ -
* / ** mod rem abs
1+ 1- log exp sqrt sin cos tan
29 asin acos atan sinh cosh tanh asinh acosh atanh float random
30 truncate floor ceiling round minusp zerop plusp evenp oddp
31 < <= = /= >= > complex conjugate realpart imagpart phase
32 min max logand logior logxor lognot ffloor fceiling
33 ftruncate fround signum cis
)
34 (:export
;; descriptive stats
35 standard-deviation quantile median interquartile-range
40 ;; the following are more matrix-centric
41 covariance-matrix matrix print-matrix solve
42 backsolve eigenvalues eigenvectors accumulate cumsum combine
45 (in-package :lisp-stat-descriptive-statistics
)
48 ;;;; Basic Summary Statistics
51 (defun standard-deviation (x)
53 Returns the standard deviation of the elements x. Vector reducing."
54 (let ((n (count-elements x
))
56 (sqrt (* (mean (* r r
)) (/ n
(- n
1))))))
59 ;; FIXME the following assume that we are using the vector based functions
62 Returns the P-th quantile(s) of sequence X. P can be a number or a sequence."
63 (let* ((x (sort-data x
))
68 (/ (+ (select x low
) (select x high
)) 2)))
72 Returns the median of the elements of X."
75 (defun interquartile-range (x)
77 Returns the interquartile range of the elements of X."
78 (reduce #'-
(quantile x
'(0.75
0.25))))
82 Returns the five number summary (min, 1st quartile, medinan, 3rd quartile,
83 max) of the elements X."
84 (quantile x
'(0 .25 .5 .75 1)))
86 (defun covariance-matrix (&rest args
)
88 Returns the sample covariance matrix of the data columns in ARGS. ARGS may
89 consist of lists, vectors or matrices."
90 (let ((columns (apply #'append
92 (if (matrixp x
) (column-list x
) (list x
)))
94 (/ (cross-product (apply #'bind-columns
95 (- columns
(mapcar #'mean columns
))))
96 (- (length (car columns
)) 1))))
98 ;;;; Sampling / Resampling
100 (defun sample (x ssize
&optional replace
)
101 "Args: (x n &optional (replace nil))
102 Returns a list of a random sample of size N from sequence X drawn with or
103 without replacement."
106 (x (if (consp x
) (coerce x
'vector
) (copy-vector x
)))
109 (dotimes (i ssize result
)
110 (let ((j (if replace
(random n
) (+ i
(random (- n i
))))))
111 (setf result
(cons (aref x j
) result
))
112 (unless replace
;; swap elements i and j
113 (let ((temp (aref x i
)))
114 (setf (aref x i
) (aref x j
))
115 (setf (aref x j
) temp
))))))))
121 Returns the mean of the elements x. Vector reducing."
124 (labels ((add-to-mean (x)
125 (let ((count+1 (+ count
1.0)))
126 (setf mean
(+ (* (/ count count
+1) mean
) (* (/ count
+1) x
)))
127 (setf count count
+1)))
131 (let ((seq (compound-data-seq x
)))
134 (if (numberp x
) (add-to-mean x
) (find-mean x
)))
135 (let ((n (length seq
)))
138 (let ((x (aref seq i
)))
141 (find-mean x
))))))))))
146 ;;;; Linear Algebra Functions
149 (defun matrix (dim data
)
151 returns a matrix of dimensions DIM initialized using sequence DATA
153 (let ((dim (coerce dim
'list
))
154 (data (coerce data
'list
)))
155 (make-array dim
:initial-contents
(split-list data
(nth 1 dim
)))))
157 (defun print-matrix (a &optional
(stream *standard-output
*))
158 "Args: (matrix &optional stream)
159 Prints MATRIX to STREAM in a nice form that is still machine readable"
160 (unless (matrixp a
) (error "not a matrix - ~a" a
))
161 (let ((size (min 15 (max (map-elements #'flatsize a
))))) ;; FIXME: flatsize not defined
162 (format stream
"#2a(~%")
163 (dolist (x (row-list a
))
165 (let ((n (length x
)))
167 (let ((y (aref x i
)))
169 ((integerp y
) (format stream
"~vd" size y
))
170 ((floatp y
) (format stream
"~vg" size y
))
171 (t (format stream
"~va" size y
))))
172 (if (< i
(- n
1)) (format stream
" "))))
173 (format stream
")~%"))
174 (format stream
" )~%")
179 Solves A x = B using LU decomposition and backsolving. B can be a sequence
181 (let ((lu (lu-decomp a
)))
183 (apply #'bind-columns
184 (mapcar #'(lambda (x) (lu-solve lu x
)) (column-list b
)))
187 (defun backsolve (a b
)
189 Solves A x = B by backsolving, assuming A is upper triangular. B must be a
190 sequence. For use with qr-decomp."
191 (let* ((n (length b
))
192 (sol (make-array n
)))
198 (setq val
(- val
(* (aref sol l
) (aref a k l
))))))
199 (setf (aref sol k
) (/ val
(aref a k k
)))))
200 (if (listp b
) (coerce sol
'list
) sol
)))
202 (defun eigenvalues (a)
204 Returns list of eigenvalues of square, symmetric matrix A"
207 (defun eigenvectors (a)
209 Returns list of eigenvectors of square, symmetric matrix A"
212 (defun accumulate (f s
)
214 Accumulates elements of sequence S using binary function F.
215 (accumulate #'+ x) returns the cumulative sum of x."
216 (let* ((result (list (elt s
0)))
218 (flet ((acc (dummy x
)
219 (rplacd tail
(list (funcall f
(first tail
) x
)))
220 (setf tail
(cdr tail
))))
222 (if (vectorp s
) (coerce result
'vector
) result
)))
226 Returns the cumulative sum of X."
229 (defun combine (&rest args
)
231 Returns sequence of elements of all arguments."
232 (copy-seq (element-seq args
)))
234 (defun lowess (x y
&key
(f .25) (steps 2) (delta -
1) sorted
)
235 "Args: (x y &key (f .25) (steps 2) delta sorted)
236 Returns (list X YS) with YS the LOWESS fit. F is the fraction of data used for
237 each point, STEPS is the number of robust iterations. Fits for points within
238 DELTA of each other are interpolated linearly. If the X values setting SORTED
239 to T speeds up the computation."
240 (let ((x (if sorted x
(sort-data x
)))
241 (y (if sorted y
(select y
(order x
))))
242 (delta (if (> delta
0.0) delta
(/ (- (max x
) (min x
)) 50))))
243 (list x
)));; (|base-lowess| x y f steps delta))))