Syncing to home
[tsl.git] / statistics.lsp
blob4428428051c3476c72649f8de7824f05edae45c7
1 ;;; -*- mode: lisp -*-
2 ;;; Copyright (c) 2005--2007, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
6 ;;; XLisp-ism's removed to focus on Common Lisp. Original source from:
7 ;;;; statistics.lsp XLISP-STAT statistics functions
8 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
9 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
10 ;;;; You may give out copies of this software; for conditions see the file
11 ;;;; COPYING included with this distribution.
13 (defpackage :lisp-stat-descriptive-statistics
14 (:use :common-lisp
15 :lisp-stat-data)
16 (:export ;; descriptive stats
17 standard-deviation quantile median interquartile-range
18 fivnum
21 ;; the following are more matrix-centric
22 covariance-matrix matrix print-matrix solve
23 backsolve eigenvalues eigenvectors accumulate cumsum combine
24 lowess))
26 (:in-package :lisp-stat-descriptive-statistics)
28 ;;;;
29 ;;;; Basic Summary Statistics
30 ;;;;
32 (defun standard-deviation (x)
33 "Args: (x)
34 Returns the standard deviation of the elements x. Vector reducing."
35 (let ((n (count-elements x))
36 (r (- x (mean x))))
37 (sqrt (* (mean (* r r)) (/ n (- n 1))))))
39 (defun quantile (x p)
40 "Args: (x p)
41 Returns the P-th quantile(s) of sequence X. P can be a number or a sequence."
42 (let* ((x (sort-data x))
43 (n (length x))
44 (np (* p (- n 1)))
45 (low (floor np))
46 (high (ceiling np)))
47 (/ (+ (select x low) (select x high)) 2)))
49 (defun median (x)
50 "Args: (x)
51 Returns the median of the elements of X."
52 (quantile x 0.5))
54 (defun interquartile-range (x)
55 "Args: (number-data)
56 Returns the interquartile range of the elements of X."
57 (apply #'- (quantile x '(0.75 0.25))))
59 (defun fivnum (x)
60 "Args: (number-data)
61 Returns the five number summary (min, 1st quartile, medinan, 3rd quartile,
62 max) of the elements X."
63 (quantile x '(0 .25 .5 .75 1)))
65 (defun covariance-matrix (&rest args)
66 "Args: (&rest args)
67 Returns the sample covariance matrix of the data columns in ARGS. ARGS may
68 consist of lists, vectors or matrices."
69 (let ((columns (apply #'append
70 (mapcar #'(lambda (x)
71 (if (matrixp x) (column-list x) (list x)))
72 args))))
73 (/ (cross-product (apply #'bind-columns
74 (- columns (mapcar #'mean columns))))
75 (- (length (car columns)) 1))))
78 ;;;;
79 ;;;; Linear Algebra Functions
80 ;;;;
82 (defun matrix (dim data)
83 "Args: (dim data)
84 returns a matrix of dimensions DIM initialized using sequence DATA
85 in row major order."
86 (let ((dim (coerce dim 'list))
87 (data (coerce data 'list)))
88 (make-array dim :initial-contents (split-list data (nth 1 dim)))))
90 (defun print-matrix (a &optional (stream *standard-output*))
91 "Args: (matrix &optional stream)
92 Prints MATRIX to STREAM in a nice form that is still machine readable"
93 (unless (matrixp a) (error "not a matrix - ~a" a))
94 (let ((size (min 15 (max (map-elements #'flatsize a)))))
95 (format stream "#2a(~%")
96 (dolist (x (row-list a))
97 (format stream " (")
98 (let ((n (length x)))
99 (dotimes (i n)
100 (let ((y (aref x i)))
101 (cond
102 ((integerp y) (format stream "~vd" size y))
103 ((floatp y) (format stream "~vg" size y))
104 (t (format stream "~va" size y))))
105 (if (< i (- n 1)) (format stream " "))))
106 (format stream ")~%"))
107 (format stream " )~%")
108 nil))
110 (defun solve (a b)
111 "Args: (a b)
112 Solves A x = B using LU decomposition and backsolving. B can be a sequence
113 or a matrix."
114 (let ((lu (lu-decomp a)))
115 (if (matrixp b)
116 (apply #'bind-columns
117 (mapcar #'(lambda (x) (lu-solve lu x)) (column-list b)))
118 (lu-solve lu b))))
120 (defun backsolve (a b)
121 "Args: (a b)
122 Solves A x = B by backsolving, assuming A is upper triangular. B must be a
123 sequence. For use with qr-decomp."
124 (let* ((n (length b))
125 (sol (make-array n)))
126 (dotimes (i n)
127 (let* ((k (- n i 1))
128 (val (elt b k)))
129 (dotimes (j i)
130 (let ((l (- n j 1)))
131 (setq val (- val (* (aref sol l) (aref a k l))))))
132 (setf (aref sol k) (/ val (aref a k k)))))
133 (if (listp b) (coerce sol 'list) sol)))
135 (defun eigenvalues (a)
136 "Args: (a)
137 Returns list of eigenvalues of square, symmetric matrix A"
138 (first (eigen a)))
140 (defun eigenvectors (a)
141 "Args: (a)
142 Returns list of eigenvectors of square, symmetric matrix A"
143 (second (eigen a)))
145 (defun accumulate (f s)
146 "Args: (f s)
147 Accumulates elements of sequence S using binary function F.
148 (accumulate #'+ x) returns the cumulative sum of x."
149 (let* ((result (list (elt s 0)))
150 (tail result))
151 (flet ((acc (dummy x)
152 (rplacd tail (list (funcall f (first tail) x)))
153 (setf tail (cdr tail))))
154 (reduce #'acc s))
155 (if (vectorp s) (coerce result 'vector) result)))
157 (defun cumsum (x)
158 "Args: (x)
159 Returns the cumulative sum of X."
160 (accumulate #'+ x))
162 (defun combine (&rest args)
163 "Args (&rest args)
164 Returns sequence of elements of all arguments."
165 (copy-seq (element-seq args)))
167 (defun lowess (x y &key (f .25) (steps 2) (delta -1) sorted)
168 "Args: (x y &key (f .25) (steps 2) delta sorted)
169 Returns (list X YS) with YS the LOWESS fit. F is the fraction of data used for
170 each point, STEPS is the number of robust iterations. Fits for points within
171 DELTA of each other are interpolated linearly. If the X values setting SORTED
172 to T speeds up the computation."
173 (let ((x (if sorted x (sort-data x)))
174 (y (if sorted y (select y (order x))))
175 (delta (if (> delta 0.0) delta (/ (- (max x) (min x)) 50))))
176 (list x)));; (|base-lowess| x y f steps delta))))