start of lisp-matrix conversion. Generic functions for statistical processing.
[CommonLispStat.git] / src / describe / statistics.lsp
blob747453c89f8dd98cace1dbffb4e308b606c64df2
1 ;;; -*- mode: lisp -*-
2 ;;; Copyright (c) 2005--2009, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
6 ;;; XLisp-ism's removed to focus on Common Lisp. Original source from:
7 ;;;; statistics.lsp XLISP-STAT statistics functions
8 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
9 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
10 ;;;; You may give out copies of this software; for conditions see the file
11 ;;;; COPYING included with this distribution.
13 (in-package :lisp-stat-descriptive-statistics)
15 ;;;
16 ;;; Basic Summary Statistics
17 ;;;
19 (defgeneric mean (x)
20 (:documentation "compute the mean of lists, vectors, various objects")
21 (:method ((x list))
22 (/ (reduce #'+ x)
23 (length x)))
24 (:method ((x vector-like))
25 (let ((n (nelts x))
26 (type (if (column-vector-p x) :column :row)))
27 (/ (gemm x (ones
28 (ecase type (:row 1) (:column n))
29 (ecase type (:row n) (:column 1))))
30 n))))
32 (defun mean-fn (x)
33 "Args: (x)
34 Returns the mean of the elements x. Vector reducing.
36 FIXME: Why is this so complex? When figure out, put into generic."
37 (let ((mean 0.0)
38 (count 0.0))
39 (labels ((add-to-mean (x)
40 (let ((count+1 (+ count 1.0)))
41 (setf mean (+ (* (/ count count+1) mean) (* (/ count+1) x)))
42 (setf count count+1)))
43 (find-mean (x)
44 (if (numberp x)
45 (add-to-mean x)
46 (let ((seq (compound-data-seq x)))
47 (if (consp seq)
48 (dolist (x seq)
49 (if (numberp x) (add-to-mean x) (find-mean x)))
50 (let ((n (length seq)))
51 (dotimes (i n)
52 (declare (fixnum i))
53 (let ((x (aref seq i)))
54 (if (numberp x)
55 (add-to-mean x)
56 (find-mean x))))))))))
57 (find-mean x)
58 mean)))
61 ;; We do the variance, since the SD is simply the root.
62 (defgeneric variance (x)
63 (:documentation "compute the variance of the entity X, i.e. if a
64 scalar, vector, or matrix.")
65 (:method ((x list))
66 (let ((n (length x))
67 (r (- x (mean x))))
68 (sqrt (* (mean (* r r)) (/ n (- n 1))))))
69 (:method ((x vector-like))))
71 (defgeneric standard-deviation (x)
72 (:documentation "Compute standard deivation of entity X.")
73 (:method ((x vector-like)) (sqrt (variance x)))
74 (:method ((x list))
75 ;; if elements are not numeric, error, otherwise
76 (sqrt (variance x)))
77 (:method ((x matrix-like))
78 (error "FIXME: define SD for matrix-like objects")))
80 (defun standard-deviation-fn (x)
81 "Args: (x)
82 Returns the standard deviation of the elements x. Vector reducing.
84 FIXME AJR: This should be redone as square-root of the variance, and
85 defer to that structure for computation."
86 (let ((n (count-elements x))
87 (r (- x (mean x))))
88 (sqrt (* (mean (* r r)) (/ n (- n 1))))))
91 (defgeneric quantile (x p)
92 (:documentation "Args: (x p)
93 Returns the P-th quantile(s) of sequence X. P can be a number or a sequence."))
95 (defmethod quantile ((x sequence) (p sequence))
96 (let* ((x (sort-data x))
97 (n (length x))
98 (np (* p (- n 1)))
99 (low (floor np))
100 (high (ceiling np)))
101 (/ (+ (select x low) (select x high)) 2)))
103 (defmethod quantile ((x sequence) (p real))
104 (let* ((x (sort-data x))
105 (n (length x))
106 (np (* p (- n 1)))
107 (low (floor np))
108 (high (ceiling np)))
109 (/ (+ (select x low) (select x high)) 2)))
112 ;;; things to build on top of quantiles...!
114 ;; Args: (x)
115 ;; Returns the median of the elements of X.
116 (defmacro median (x)
117 `(quantile ,x 0.5))
118 ;; (macroexpand '(median (list 1 2 3)))
119 ;; (median (list 1 2 3))
122 ;; Args: (number-data)
123 ;; Returns the interquartile range of the elements of X.
124 (defmacro interquartile-range (x)
125 `(reduce #'- (quantile ,x '(0.75 0.25))))
127 (defun fivnum (x)
128 "Args: (number-data)
129 Returns the five number summary (min, 1st quartile, medinan, 3rd quartile,
130 max) of the elements X."
131 (quantile x '(0 .25 .5 .75 1)))
133 (defun covariance-matrix (&rest args)
134 "Args: (&rest args)
135 Returns the sample covariance matrix of the data columns in ARGS. ARGS may
136 consist of lists, vectors or matrices."
137 (let ((columns (apply #'append
138 (mapcar #'(lambda (x)
139 (if (matrixp x) (column-list x) (list x)))
140 args))))
141 (/ (cross-product (apply #'bind-columns
142 (- columns (mapcar #'mean columns))))
143 (- (length (car columns)) 1))))
145 ;;;; Sampling / Resampling
147 (defun sample (x ssize &optional replace)
148 "Args: (x n &optional (replace nil))
149 Returns a list of a random sample of size N from sequence X drawn with or
150 without replacement."
151 (check-sequence x)
152 (let ((n (length x))
153 (x (if (consp x) (coerce x 'vector) (copy-vector x)))
154 (result nil))
155 (if (< 0 n)
156 (dotimes (i ssize result)
157 (let ((j (if replace (random n) (+ i (random (- n i))))))
158 (setf result (cons (aref x j) result))
159 (unless replace ;; swap elements i and j
160 (let ((temp (aref x i)))
161 (setf (aref x i) (aref x j))
162 (setf (aref x j) temp))))))))