ignore binary objects
[tsl.git] / statistics.lsp
blobf4545bafcc7525189d5cbcf530242b44b08ee649
1 ;;; -*- mode: lisp -*-
2 ;;; Copyright (c) 2005--2007, by A.J. Rossini <blindglobe@gmail.com>
3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
4 ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp.
6 ;;; XLisp-ism's removed to focus on Common Lisp.
8 ;;;; statistics.lsp XLISP-STAT statistics functions
9 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
10 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
11 ;;;; You may give out copies of this software; for conditions see the file
12 ;;;; COPYING included with this distribution.
13 ;;;;
15 (defpackage :lisp-stat
16 (:use common-lisp)
17 ;;(:import-from :lisp-stat-basics |base-lowess|)
18 (:export open-file-dialog read-data-file read-data-columns load-data
19 load-example *variables* *ask-on-redefine* def variables savevar
20 undef standard-deviation quantile median interquartile-range
21 fivnum covariance-matrix difference rseq matrix print-matrix solve
22 backsolve eigenvalues eigenvectors accumulate cumsum combine
23 lowess))
26 (:in-package :lisp-stat)
29 ;;;;
30 ;;;; Data File Reading
31 ;;;;
33 (defun count-file-columns (fname)
34 "Args: (fname)
35 Returns the number of lisp items on the first nonblank line of file FNAME."
36 (with-open-file (f fname)
37 (if f
38 (let ((line (do ((line (read-line f) (read-line f)))
39 ((or (null line) (< 0 (length line))) line))))
40 (if line
41 (with-input-from-string (s line)
42 (do ((n 0 (+ n 1)) (eof (gensym)))
43 ((eq eof (read s nil eof)) n))))))))
45 #+xlisp (defvar *xlisptable* *readtable*)
47 (if (not (fboundp 'open-file-dialog))
48 #+dialogs
49 (defun open-file-dialog (&optional set)
50 (get-string-dialog "Enter a data file name:"))
51 #-dialogs
52 (defun open-file-dialog (&optional set)
53 (error "You must provide a file name explicitly")))
55 (defun read-data-file (&optional (file (open-file-dialog t)))
56 "Args: (file)
57 Returns a list of all lisp objects in FILE. FILE can be a string or a symbol,
58 in which case the symbol'f print name is used."
59 (if file
60 (let ((eof (gensym)))
61 (with-open-file (f file)
62 (if f
63 (do* ((r (read f nil eof) (read f nil eof))
64 (x (list nil))
65 (tail x (cdr tail)))
66 ((eq r eof) (cdr x))
67 (setf (cdr tail) (list r))))))))
69 ;;; New definition to avoid stack size limit in apply
70 (defun read-data-columns (&optional (file (open-file-dialog t))
71 (cols (if file
72 (count-file-columns file))))
73 "Args: (&optional file cols)
74 Reads the data in FILE as COLS columns and returns a list of lists representing the columns."
75 (if (and file cols)
76 (transpose (split-list (read-data-file file) cols))))
78 #+unix
79 (defun load-data (file)
80 "Args: (file)
81 Read in data file from the data examples library."
82 (if (load (format nil "~aData/~a" *default-path* file))
84 (load (format nil "~aExamples/~a" *default-path* file))))
86 #+unix
87 (defun load-example (file)
88 "Args: (file)
89 Read in lisp example file from the examples library."
90 (if (load (format nil "~aExamples/~a" *default-path* file))
92 (load (format nil "~aData/~a" *default-path* file))))
93 #+macintosh
94 (defun load-data (s) (require s (concatenate 'string ":Data:" s)))
95 #+macintosh
96 (defun load-example (s) (require s (concatenate 'string ":Examples:" s)))
98 #+msdos
99 (defun load-data (file)
100 "Args: (file)
101 Read in data file from the data examples library."
102 (load (format nil "~aData\\~a" *default-path* file)))
104 #+msdos
105 (defun load-example (file)
106 "Args: (file)
107 Read in lisp example file from the examples library."
108 (load (format nil "~aExamples\\~a" *default-path* file)))
110 ;;;;
111 ;;;; Listing and Saving Variables and Functions
112 ;;;;
114 (defvar *variables* nil)
115 (defvar *ask-on-redefine* nil)
117 (defmacro def (symbol value)
118 "Syntax: (def var form)
119 VAR is not evaluated and must be a symbol. Assigns the value of FORM to
120 VAR and adds VAR to the list *VARIABLES* of def'ed variables. Returns VAR.
121 If VAR is already bound and the global variable *ASK-ON-REDEFINE*
122 is not nil then you are asked if you want to redefine the variable."
123 `(unless (and *ask-on-redefine*
124 (boundp ',symbol)
125 (not (y-or-n-p "Variable has a value. Redefine?")))
126 (pushnew ',symbol *variables*)
127 (setf ,symbol ,value)
128 ',symbol))
130 (defun variables-list ()
131 (mapcar #'intern (sort-data (mapcar #'string *variables*))))
133 (defun variables ()
134 "Args:()
135 Returns a list of the names of all def'ed variables to STREAM"
136 (if *variables*
137 (mapcar #'intern (sort-data (mapcar #'string *variables*)))))
139 (defun savevar (vars file)
140 "Args: (vars file-name-root)
141 VARS is a symbol or a list of symbols. FILE-NAME-ROOT is a string (or a symbol
142 whose print name is used) not endinf in .lsp. The VARS and their current values
143 are written to the file FILE-NAME-ROOT.lsp in a form suitable for use with the
144 load command."
145 (with-open-file (f (strcat (string file) ".lsp") :direction :output)
146 (let ((vars (if (consp vars) vars (list vars))))
147 (flet ((save-one (x)
148 (let ((v (symbol-value x)))
149 (if (objectp v)
150 (format f "(def ~s ~s)~%" x (send v :save))
151 (format f "(def ~s '~s)~%" x v)))))
152 (mapcar #'save-one vars))
153 vars)))
155 (defun undef (v)
156 "Args: (v)
157 If V is the symbol of a defined variable the variable it is unbound and
158 removed from the list of defined variables. If V is a list of variable
159 names each is unbound and removed. Returns V."
160 (dolist (s (if (listp v) v (list v)))
161 (when (member s *variables*)
162 (setq *variables* (delete s *variables*))
163 (makunbound s)))
167 ;;;;
168 ;;;; Basic Summary Statistics
169 ;;;;
171 (defun standard-deviation (x)
172 "Args: (x)
173 Returns the standard deviation of the elements x. Vector reducing."
174 (let ((n (count-elements x))
175 (r (- x (mean x))))
176 (sqrt (* (mean (* r r)) (/ n (- n 1))))))
178 (defun quantile (x p)
179 "Args: (x p)
180 Returns the P-th quantile(s) of sequence X. P can be a number or a sequence."
181 (let* ((x (sort-data x))
182 (n (length x))
183 (np (* p (- n 1)))
184 (low (floor np))
185 (high (ceiling np)))
186 (/ (+ (select x low) (select x high)) 2)))
188 (defun median (x)
189 "Args: (x)
190 Returns the median of the elements of X."
191 (quantile x 0.5))
193 (defun interquartile-range (x)
194 "Args: (number-data)
195 Returns the interquartile range of the elements of X."
196 (apply #'- (quantile x '(0.75 0.25))))
198 (defun fivnum (x)
199 "Args: (number-data)
200 Returns the five number summary (min, 1st quartile, medinan, 3rd quartile,
201 max) of the elements X."
202 (quantile x '(0 .25 .5 .75 1)))
204 (defun covariance-matrix (&rest args)
205 "Args: (&rest args)
206 Returns the sample covariance matrix of the data columns in ARGS. ARGS may
207 consist of lists, vectors or matrices."
208 (let ((columns (apply #'append
209 (mapcar #'(lambda (x)
210 (if (matrixp x) (column-list x) (list x)))
211 args))))
212 (/ (cross-product (apply #'bind-columns
213 (- columns (mapcar #'mean columns))))
214 (- (length (car columns)) 1))))
216 ;;;;
217 ;;;; Basic Sequence Operations
218 ;;;;
220 (defun difference (x)
221 "Args: (x)
222 Returns differences for a sequence X."
223 (let ((n (length x)))
224 (- (select x (iseq 1 (1- n))) (select x (iseq 0 (- n 2))))))
226 (defun rseq (a b num)
227 "Args: (a b num)
228 Returns a list of NUM equally spaced points starting at A and ending at B."
229 (+ a (* (iseq 0 (1- num)) (/ (float (- b a)) (1- num)))))
232 ;;;;
233 ;;;; Linear Algebra Functions
234 ;;;;
236 (defun matrix (dim data)
237 "Args: (dim data)
238 returns a matrix of dimensions DIM initialized using sequence DATA
239 in row major order."
240 (let ((dim (coerce dim 'list))
241 (data (coerce data 'list)))
242 (make-array dim :initial-contents (split-list data (nth 1 dim)))))
244 (defun print-matrix (a &optional (stream *standard-output*))
245 "Args: (matrix &optional stream)
246 Prints MATRIX to STREAM in a nice form that is still machine readable"
247 (unless (matrixp a) (error "not a matrix - ~a" a))
248 (let ((size (min 15 (max (map-elements #'flatsize a)))))
249 (format stream "#2a(~%")
250 (dolist (x (row-list a))
251 (format stream " (")
252 (let ((n (length x)))
253 (dotimes (i n)
254 (let ((y (aref x i)))
255 (cond
256 ((integerp y) (format stream "~vd" size y))
257 ((floatp y) (format stream "~vg" size y))
258 (t (format stream "~va" size y))))
259 (if (< i (- n 1)) (format stream " "))))
260 (format stream ")~%"))
261 (format stream " )~%")
262 nil))
264 (defun solve (a b)
265 "Args: (a b)
266 Solves A x = B using LU decomposition and backsolving. B can be a sequence
267 or a matrix."
268 (let ((lu (lu-decomp a)))
269 (if (matrixp b)
270 (apply #'bind-columns
271 (mapcar #'(lambda (x) (lu-solve lu x)) (column-list b)))
272 (lu-solve lu b))))
274 (defun backsolve (a b)
275 "Args: (a b)
276 Solves A x = B by backsolving, assuming A is upper triangular. B must be a
277 sequence. For use with qr-decomp."
278 (let* ((n (length b))
279 (sol (make-array n)))
280 (dotimes (i n)
281 (let* ((k (- n i 1))
282 (val (elt b k)))
283 (dotimes (j i)
284 (let ((l (- n j 1)))
285 (setq val (- val (* (aref sol l) (aref a k l))))))
286 (setf (aref sol k) (/ val (aref a k k)))))
287 (if (listp b) (coerce sol 'list) sol)))
289 (defun eigenvalues (a)
290 "Args: (a)
291 Returns list of eigenvalues of square, symmetric matrix A"
292 (first (eigen a)))
294 (defun eigenvectors (a)
295 "Args: (a)
296 Returns list of eigenvectors of square, symmetric matrix A"
297 (second (eigen a)))
299 (defun accumulate (f s)
300 "Args: (f s)
301 Accumulates elements of sequence S using binary function F.
302 (accumulate #'+ x) returns the cumulative sum of x."
303 (let* ((result (list (elt s 0)))
304 (tail result))
305 (flet ((acc (dummy x)
306 (rplacd tail (list (funcall f (first tail) x)))
307 (setf tail (cdr tail))))
308 (reduce #'acc s))
309 (if (vectorp s) (coerce result 'vector) result)))
311 (defun cumsum (x)
312 "Args: (x)
313 Returns the cumulative sum of X."
314 (accumulate #'+ x))
316 (defun combine (&rest args)
317 "Args (&rest args)
318 Returns sequence of elements of all arguments."
319 (copy-seq (element-seq args)))
321 (defun lowess (x y &key (f .25) (steps 2) (delta -1) sorted)
322 "Args: (x y &key (f .25) (steps 2) delta sorted)
323 Returns (list X YS) with YS the LOWESS fit. F is the fraction of data used for
324 each point, STEPS is the number of robust iterations. Fits for points within
325 DELTA of each other are interpolated linearly. If the X values setting SORTED
326 to T speeds up the computation."
327 (let ((x (if sorted x (sort-data x)))
328 (y (if sorted y (select y (order x))))
329 (delta (if (> delta 0.0) delta (/ (- (max x) (min x)) 50))))
330 (list x)));; (|base-lowess| x y f steps delta))))