small text cleanup
[tsl.git] / statistics.lsp
blobd26fb1d69b902b62a73ef8bf6c268d41061cb5b1
1 ;;;;
2 ;;;; statistics.lsp XLISP-STAT statistics functions
3 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
4 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
5 ;;;; You may give out copies of this software; for conditions see the file
6 ;;;; COPYING included with this distribution.
7 ;;;;
9 (provide "statistics")
11 #+:CLtL2
12 (in-package lisp-stat)
13 #-:CLtL2
14 (in-package 'lisp-stat)
16 (export '(open-file-dialog read-data-file read-data-columns load-data
17 load-example *variables* *ask-on-redefine* def variables savevar
18 undef standard-deviation quantile median interquartile-range
19 fivnum covariance-matrix difference rseq matrix print-matrix solve
20 backsolve eigenvalues eigenvectors accumulate cumsum combine
21 lowess))
23 (import 'ls-basics::|base-lowess|)
25 ;;;;
26 ;;;; Data File Reading
27 ;;;;
29 (defun count-file-columns (fname)
30 "Args: (fname)
31 Returns the number of lisp items on the first nonblank line of file FNAME."
32 (with-open-file (f fname)
33 (if f
34 (let ((line (do ((line (read-line f) (read-line f)))
35 ((or (null line) (< 0 (length line))) line))))
36 (if line
37 (with-input-from-string (s line)
38 (do ((n 0 (+ n 1)) (eof (gensym)))
39 ((eq eof (read s nil eof)) n))))))))
41 #+xlisp (defvar *xlisptable* *readtable*)
43 (if (not (fboundp 'open-file-dialog))
44 #+dialogs
45 (defun open-file-dialog (&optional set)
46 (get-string-dialog "Enter a data file name:"))
47 #-dialogs
48 (defun open-file-dialog (&optional set)
49 (error "You must provide a file name explicitly")))
51 (defun read-data-file (&optional (file (open-file-dialog t)))
52 "Args: (file)
53 Returns a list of all lisp objects in FILE. FILE can be a string or a symbol,
54 in which case the symbol'f print name is used."
55 (if file
56 (let ((eof (gensym)))
57 (with-open-file (f file)
58 (if f
59 (do* ((r (read f nil eof) (read f nil eof))
60 (x (list nil))
61 (tail x (cdr tail)))
62 ((eq r eof) (cdr x))
63 (setf (cdr tail) (list r))))))))
65 ;;; New definition to avoid stack size limit in apply
66 (defun read-data-columns (&optional (file (open-file-dialog t))
67 (cols (if file
68 (count-file-columns file))))
69 "Args: (&optional file cols)
70 Reads the data in FILE as COLS columns and returns a list of lists representing the columns."
71 (if (and file cols)
72 (transpose (split-list (read-data-file file) cols))))
74 #+unix
75 (defun load-data (file)
76 "Args: (file)
77 Read in data file from the data examples library."
78 (if (load (format nil "~aData/~a" *default-path* file))
80 (load (format nil "~aExamples/~a" *default-path* file))))
82 #+unix
83 (defun load-example (file)
84 "Args: (file)
85 Read in lisp example file from the examples library."
86 (if (load (format nil "~aExamples/~a" *default-path* file))
88 (load (format nil "~aData/~a" *default-path* file))))
89 #+macintosh
90 (defun load-data (s) (require s (concatenate 'string ":Data:" s)))
91 #+macintosh
92 (defun load-example (s) (require s (concatenate 'string ":Examples:" s)))
94 #+msdos
95 (defun load-data (file)
96 "Args: (file)
97 Read in data file from the data examples library."
98 (load (format nil "~aData\\~a" *default-path* file)))
100 #+msdos
101 (defun load-example (file)
102 "Args: (file)
103 Read in lisp example file from the examples library."
104 (load (format nil "~aExamples\\~a" *default-path* file)))
106 ;;;;
107 ;;;; Listing and Saving Variables and Functions
108 ;;;;
110 (defvar *variables* nil)
111 (defvar *ask-on-redefine* nil)
113 (defmacro def (symbol value)
114 "Syntax: (def var form)
115 VAR is not evaluated and must be a symbol. Assigns the value of FORM to
116 VAR and adds VAR to the list *VARIABLES* of def'ed variables. Returns VAR.
117 If VAR is already bound and the global variable *ASK-ON-REDEFINE*
118 is not nil then you are asked if you want to redefine the variable."
119 `(unless (and *ask-on-redefine*
120 (boundp ',symbol)
121 (not (y-or-n-p "Variable has a value. Redefine?")))
122 (pushnew ',symbol *variables*)
123 (setf ,symbol ,value)
124 ',symbol))
126 (defun variables-list ()
127 (mapcar #'intern (sort-data (mapcar #'string *variables*))))
129 (defun variables ()
130 "Args:()
131 Returns a list of the names of all def'ed variables to STREAM"
132 (if *variables*
133 (mapcar #'intern (sort-data (mapcar #'string *variables*)))))
135 (defun savevar (vars file)
136 "Args: (vars file-name-root)
137 VARS is a symbol or a list of symbols. FILE-NAME-ROOT is a string (or a symbol
138 whose print name is used) not endinf in .lsp. The VARS and their current values
139 are written to the file FILE-NAME-ROOT.lsp in a form suitable for use with the
140 load command."
141 (with-open-file (f (strcat (string file) ".lsp") :direction :output)
142 (let ((vars (if (consp vars) vars (list vars))))
143 (flet ((save-one (x)
144 (let ((v (symbol-value x)))
145 (if (objectp v)
146 (format f "(def ~s ~s)~%" x (send v :save))
147 (format f "(def ~s '~s)~%" x v)))))
148 (mapcar #'save-one vars))
149 vars)))
151 (defun undef (v)
152 "Args: (v)
153 If V is the symbol of a defined variable the variable it is unbound and
154 removed from the list of defined variables. If V is a list of variable
155 names each is unbound and removed. Returns V."
156 (dolist (s (if (listp v) v (list v)))
157 (when (member s *variables*)
158 (setq *variables* (delete s *variables*))
159 (makunbound s)))
163 ;;;;
164 ;;;; Basic Summary Statistics
165 ;;;;
167 (defun standard-deviation (x)
168 "Args: (x)
169 Returns the standard deviation of the elements x. Vector reducing."
170 (let ((n (count-elements x))
171 (r (- x (mean x))))
172 (sqrt (* (mean (* r r)) (/ n (- n 1))))))
174 (defun quantile (x p)
175 "Args: (x p)
176 Returns the P-th quantile(s) of sequence X. P can be a number or a sequence."
177 (let* ((x (sort-data x))
178 (n (length x))
179 (np (* p (- n 1)))
180 (low (floor np))
181 (high (ceiling np)))
182 (/ (+ (select x low) (select x high)) 2)))
184 (defun median (x)
185 "Args: (x)
186 Returns the median of the elements of X."
187 (quantile x 0.5))
189 (defun interquartile-range (x)
190 "Args: (number-data)
191 Returns the interquartile range of the elements of X."
192 (apply #'- (quantile x '(0.75 0.25))))
194 (defun fivnum (x)
195 "Args: (number-data)
196 Returns the five number summary (min, 1st quartile, medinan, 3rd quartile,
197 max) of the elements X."
198 (quantile x '(0 .25 .5 .75 1)))
200 (defun covariance-matrix (&rest args)
201 "Args: (&rest args)
202 Returns the sample covariance matrix of the data columns in ARGS. ARGS may
203 consist of lists, vectors or matrices."
204 (let ((columns (apply #'append
205 (mapcar #'(lambda (x)
206 (if (matrixp x) (column-list x) (list x)))
207 args))))
208 (/ (cross-product (apply #'bind-columns
209 (- columns (mapcar #'mean columns))))
210 (- (length (car columns)) 1))))
212 ;;;;
213 ;;;; Basic Sequence Operations
214 ;;;;
216 (defun difference (x)
217 "Args: (x)
218 Returns differences for a sequence X."
219 (let ((n (length x)))
220 (- (select x (iseq 1 (1- n))) (select x (iseq 0 (- n 2))))))
222 (defun rseq (a b num)
223 "Args: (a b num)
224 Returns a list of NUM equally spaced points starting at A and ending at B."
225 (+ a (* (iseq 0 (1- num)) (/ (float (- b a)) (1- num)))))
228 ;;;;
229 ;;;; Linear Algebra Functions
230 ;;;;
232 (defun matrix (dim data)
233 "Args: (dim data)
234 returns a matrix of dimensions DIM initialized using sequence DATA
235 in row major order."
236 (let ((dim (coerce dim 'list))
237 (data (coerce data 'list)))
238 (make-array dim :initial-contents (split-list data (nth 1 dim)))))
240 (defun print-matrix (a &optional (stream *standard-output*))
241 "Args: (matrix &optional stream)
242 Prints MATRIX to STREAM in a nice form that is still machine readable"
243 (unless (matrixp a) (error "not a matrix - ~a" a))
244 (let ((size (min 15 (max (map-elements #'flatsize a)))))
245 (format stream "#2a(~%")
246 (dolist (x (row-list a))
247 (format stream " (")
248 (let ((n (length x)))
249 (dotimes (i n)
250 (let ((y (aref x i)))
251 (cond
252 ((integerp y) (format stream "~vd" size y))
253 ((floatp y) (format stream "~vg" size y))
254 (t (format stream "~va" size y))))
255 (if (< i (- n 1)) (format stream " "))))
256 (format stream ")~%"))
257 (format stream " )~%")
258 nil))
260 (defun solve (a b)
261 "Args: (a b)
262 Solves A x = B using LU decomposition and backsolving. B can be a sequence
263 or a matrix."
264 (let ((lu (lu-decomp a)))
265 (if (matrixp b)
266 (apply #'bind-columns
267 (mapcar #'(lambda (x) (lu-solve lu x)) (column-list b)))
268 (lu-solve lu b))))
270 (defun backsolve (a b)
271 "Args: (a b)
272 Solves A x = B by backsolving, assuming A is upper triangular. B must be a
273 sequence. For use with qr-decomp."
274 (let* ((n (length b))
275 (sol (make-array n)))
276 (dotimes (i n)
277 (let* ((k (- n i 1))
278 (val (elt b k)))
279 (dotimes (j i)
280 (let ((l (- n j 1)))
281 (setq val (- val (* (aref sol l) (aref a k l))))))
282 (setf (aref sol k) (/ val (aref a k k)))))
283 (if (listp b) (coerce sol 'list) sol)))
285 (defun eigenvalues (a)
286 "Args: (a)
287 Returns list of eigenvalues of square, symmetric matrix A"
288 (first (eigen a)))
290 (defun eigenvectors (a)
291 "Args: (a)
292 Returns list of eigenvectors of square, symmetric matrix A"
293 (second (eigen a)))
295 (defun accumulate (f s)
296 "Args: (f s)
297 Accumulates elements of sequence S using binary function F.
298 (accumulate #'+ x) returns the cumulative sum of x."
299 (let* ((result (list (elt s 0)))
300 (tail result))
301 (flet ((acc (dummy x)
302 (rplacd tail (list (funcall f (first tail) x)))
303 (setf tail (cdr tail))))
304 (reduce #'acc s))
305 (if (vectorp s) (coerce result 'vector) result)))
307 (defun cumsum (x)
308 "Args: (x)
309 Returns the cumulative sum of X."
310 (accumulate #'+ x))
312 (defun combine (&rest args)
313 "Args (&rest args)
314 Returns sequence of elements of all arguments."
315 (copy-seq (element-seq args)))
317 (defun lowess (x y &key (f .25) (steps 2) (delta -1) sorted)
318 "Args: (x y &key (f .25) (steps 2) delta sorted)
319 Returns (list X YS) with YS the LOWESS fit. F is the fraction of data used for
320 each point, STEPS is the number of robust iterations. Fits for points within
321 DELTA of each other are interpolated linearly. If the X values setting SORTED
322 to T speeds up the computation."
323 (let ((x (if sorted x (sort-data x)))
324 (y (if sorted y (select y (order x))))
325 (delta (if (> delta 0.0) delta (/ (- (max x) (min x)) 50))))
326 (list x (|base-lowess| x y f steps delta))))