2 ;;; Copyright (c) 2005--2007, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
6 ;;; XLisp-ism's removed to focus on Common Lisp. Original source from:
7 ;;;; statistics.lsp XLISP-STAT statistics functions
8 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
9 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
10 ;;;; You may give out copies of this software; for conditions see the file
11 ;;;; COPYING included with this distribution.
15 (defpackage :lisp-stat-descriptive-statistics
18 (:export
;; descriptive stats
19 standard-deviation quantile median interquartile-range
24 ;; the following are more matrix-centric
25 covariance-matrix matrix print-matrix solve
26 backsolve eigenvalues eigenvectors accumulate cumsum combine
29 (in-package :lisp-stat-descriptive-statistics
)
32 ;;;; Basic Summary Statistics
35 (defun standard-deviation (x)
37 Returns the standard deviation of the elements x. Vector reducing."
38 (let ((n (count-elements x
))
40 (sqrt (* (mean (* r r
)) (/ n
(- n
1))))))
44 Returns the P-th quantile(s) of sequence X. P can be a number or a sequence."
45 (let* ((x (sort-data x
))
50 (/ (+ (select x low
) (select x high
)) 2)))
54 Returns the median of the elements of X."
57 (defun interquartile-range (x)
59 Returns the interquartile range of the elements of X."
60 (apply #'-
(quantile x
'(0.75
0.25))))
64 Returns the five number summary (min, 1st quartile, medinan, 3rd quartile,
65 max) of the elements X."
66 (quantile x
'(0 .25 .5 .75 1)))
68 (defun covariance-matrix (&rest args
)
70 Returns the sample covariance matrix of the data columns in ARGS. ARGS may
71 consist of lists, vectors or matrices."
72 (let ((columns (apply #'append
74 (if (matrixp x
) (column-list x
) (list x
)))
76 (/ (cross-product (apply #'bind-columns
77 (- columns
(mapcar #'mean columns
))))
78 (- (length (car columns
)) 1))))
80 ;;;; Sampling / Resampling
82 (defun sample (x ssize
&optional replace
)
83 "Args: (x n &optional (replace nil))
84 Returns a list of a random sample of size N from sequence X drawn with or
88 (x (if (consp x
) (coerce x
'vector
) (copy-vector x
)))
91 (dotimes (i ssize result
)
92 (let ((j (if replace
(random n
) (+ i
(random (- n i
))))))
93 (setf result
(cons (aref x j
) result
))
94 (unless replace
;; swap elements i and j
95 (let ((temp (aref x i
)))
96 (setf (aref x i
) (aref x j
))
97 (setf (aref x j
) temp
))))))))
101 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
103 ;;;; Sorting Functions
105 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
109 Returns a sequence with the numbers or strings in the sequence X in order."
110 (flet ((less (x y
) (if (numberp x
) (< x y
) (string-lessp x y
))))
111 (stable-sort (copy-seq (compound-data-seq x
)) #'less
)))
115 Returns a sequence of the indices of elements in the sequence of numbers
116 or strings X in order."
117 (let* ((seq (compound-data-seq x
))
118 (type (if (consp seq
) 'list
'vector
))
120 (flet ((entry (x) (setf i
(+ i
1)) (list x i
))
124 (if (numberp x
) (< x y
) (string-lessp x y
)))))
125 (let ((sorted-seq (stable-sort (map type
#'entry seq
) #'less
)))
126 (map type
#'second sorted-seq
)))))
128 ;; this isn't destructive -- do we document destructive only, or any
132 Returns a sequence with the elements of the list or array of numbers or
133 strings X replaced by their ranks."
134 (let ((ranked-seq (order (order x
))))
135 (make-compound-data (compound-data-shape x
) ranked-seq
)))
139 Returns the mean of the elements x. Vector reducing."
142 (labels ((add-to-mean (x)
143 (let ((count+1 (+ count
1.0)))
144 (setf mean
(+ (* (/ count count
+1) mean
) (* (/ count
+1) x
)))
145 (setf count count
+1)))
149 (let ((seq (compound-data-seq x
)))
152 (if (numberp x
) (add-to-mean x
) (find-mean x
)))
153 (let ((n (length seq
)))
156 (let ((x (aref seq i
)))
159 (find-mean x
))))))))))
164 ;;;; Linear Algebra Functions
167 (defun matrix (dim data
)
169 returns a matrix of dimensions DIM initialized using sequence DATA
171 (let ((dim (coerce dim
'list
))
172 (data (coerce data
'list
)))
173 (make-array dim
:initial-contents
(split-list data
(nth 1 dim
)))))
175 (defun print-matrix (a &optional
(stream *standard-output
*))
176 "Args: (matrix &optional stream)
177 Prints MATRIX to STREAM in a nice form that is still machine readable"
178 (unless (matrixp a
) (error "not a matrix - ~a" a
))
179 (let ((size (min 15 (max (map-elements #'flatsize a
)))))
180 (format stream
"#2a(~%")
181 (dolist (x (row-list a
))
183 (let ((n (length x
)))
185 (let ((y (aref x i
)))
187 ((integerp y
) (format stream
"~vd" size y
))
188 ((floatp y
) (format stream
"~vg" size y
))
189 (t (format stream
"~va" size y
))))
190 (if (< i
(- n
1)) (format stream
" "))))
191 (format stream
")~%"))
192 (format stream
" )~%")
197 Solves A x = B using LU decomposition and backsolving. B can be a sequence
199 (let ((lu (lu-decomp a
)))
201 (apply #'bind-columns
202 (mapcar #'(lambda (x) (lu-solve lu x
)) (column-list b
)))
205 (defun backsolve (a b
)
207 Solves A x = B by backsolving, assuming A is upper triangular. B must be a
208 sequence. For use with qr-decomp."
209 (let* ((n (length b
))
210 (sol (make-array n
)))
216 (setq val
(- val
(* (aref sol l
) (aref a k l
))))))
217 (setf (aref sol k
) (/ val
(aref a k k
)))))
218 (if (listp b
) (coerce sol
'list
) sol
)))
220 (defun eigenvalues (a)
222 Returns list of eigenvalues of square, symmetric matrix A"
225 (defun eigenvectors (a)
227 Returns list of eigenvectors of square, symmetric matrix A"
230 (defun accumulate (f s
)
232 Accumulates elements of sequence S using binary function F.
233 (accumulate #'+ x) returns the cumulative sum of x."
234 (let* ((result (list (elt s
0)))
236 (flet ((acc (dummy x
)
237 (rplacd tail
(list (funcall f
(first tail
) x
)))
238 (setf tail
(cdr tail
))))
240 (if (vectorp s
) (coerce result
'vector
) result
)))
244 Returns the cumulative sum of X."
247 (defun combine (&rest args
)
249 Returns sequence of elements of all arguments."
250 (copy-seq (element-seq args
)))
252 (defun lowess (x y
&key
(f .25) (steps 2) (delta -
1) sorted
)
253 "Args: (x y &key (f .25) (steps 2) delta sorted)
254 Returns (list X YS) with YS the LOWESS fit. F is the fraction of data used for
255 each point, STEPS is the number of robust iterations. Fits for points within
256 DELTA of each other are interpolated linearly. If the X values setting SORTED
257 to T speeds up the computation."
258 (let ((x (if sorted x
(sort-data x
)))
259 (y (if sorted y
(select y
(order x
))))
260 (delta (if (> delta
0.0) delta
(/ (- (max x
) (min x
)) 50))))
261 (list x
)));; (|base-lowess| x y f steps delta))))