whitespace (lispstat.asd) and stat model move (lispstat.asd and others).
[CommonLispStat.git] / src / stat-models / regression-clos.lisp
blob45ec801585f33f65d62139410d40e72bf8937ddb
1 ;;; -*- mode: lisp -*-
3 ;;; File: data.lisp
4 ;;; Author: AJ Rossini <blindglobe@gmail.com>
5 ;;; Copyright: (c)2007, AJ Rossini. BSD, LLGPL, or GPLv2, depending
6 ;;; on how it arrives.
7 ;;; Purpose: data package for lispstat
8 ;;; Time-stamp: <2008-03-11 19:18:48 user>
9 ;;; Creation: <2008-03-11 19:18:34 user>
11 ;;; What is this talk of 'release'? Klingons do not make software
12 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
13 ;;; designers and quality assurance people in its wake.
15 ;;; This organization and structure is new to the 21st Century
16 ;;; version.
18 ;;; regression-clos.lisp
19 ;;;
20 ;;; redoing regression in a CLOS based framework.
21 ;;; See regression.lsp for basis of work.
23 (in-package :cl-user)
25 (defpackage :lisp-stat-regression-linear-clos
26 (:use :common-lisp
27 :clem )
29 (:export regression-model regression-model-obj x y intercept sweep-matrix
30 basis weights included total-sum-of-squares residual-sum-of-squares
31 predictor-names response-name case-labels))
33 (in-package :lisp-stat-regression-linear-clos)
35 ;;; Regresion Model CLOS
37 (defclass regression-model-clos (statistical-model)
38 ((x :initform nil :initarg :x :accessor x)
39 (y :initform nil :initarg :y :accessor y)
40 (included :initform nil :initarg :y :accessor y)
41 (total-sum-of-squares :initform nil :initarg :y :accessor y)
42 (residual-sum-of-squares :initform nil :initarg :y :accessor y)
43 (predictor-names :initform nil :initarg :y :accessor y)
44 (response-name :initform nil :initarg :y :accessor y)
45 (case-labels :initform nil :initarg :y :accessor y)
46 (needs-computing :initform T :initarg :compute? :accessor compute?)
47 (:documentation "Normal Linear Regression Model through CLOS."))
49 (defmethod fit ((regr-inst regression-model-clos))
50 "Args: (regr-inst regressino-model-clos)
52 Returns a fitted regression model object. To examine the model further
53 assign the result to a variable and send it messages. Example (data
54 are in file absorbtion.lsp in the sample data directory/folder):
55 (def fit-m (fit (new 'regression-model-clos (list iron aluminum) absorbtion)))
56 (print fit-m)
57 (plot fit-m :feature 'residuals)"
58 (let ((x (cond
59 ((matrixp x) x)
60 ((vectorp x) (list x))
61 ((and (consp x) (numberp (car x))) (list x))
62 (t x)))
63 (m (send regression-model-proto :new)))
64 (send m :x (if (matrixp x) x (apply #'bind-columns x)))
65 (setf (slot-value 'y) y)
66 (send m :intercept intercept)
67 (send m :weights weights)
68 (send m :included included)
69 (send m :predictor-names predictor-names)
70 (send m :response-name response-name)
71 (send m :case-labels case-labels)
72 (if print (send m :display))
73 m))
75 (defmeth regression-model-proto :isnew ()
76 (send self :needs-computing t))
78 (defmeth regression-model-proto :save ()
79 "Message args: ()
80 Returns an expression that will reconstruct the regression model."
81 `(regression-model ',(send self :x)
82 ',(send self :y)
83 :intercept ',(send self :intercept)
84 :weights ',(send self :weights)
85 :included ',(send self :included)
86 :predictor-names ',(send self :predictor-names)
87 :response-name ',(send self :response-name)
88 :case-labels ',(send self :case-labels)))
90 ;;; Computing and Display Methods
92 (defmeth regression-model-proto :compute ()
93 "Message args: ()
94 Recomputes the estimates. For internal use by other messages"
95 (let* ((included (if-else (send self :included) 1 0))
96 (x (send self :x))
97 (y (send self :y))
98 (intercept (send self :intercept))
99 (weights (send self :weights))
100 (w (if weights (* included weights) included))
101 (m (make-sweep-matrix x y w))
102 (n (array-dimension x 1))
103 (p (- (array-dimension m 0) 1))
104 (tss (aref m p p))
105 (tol (* 0.001 (reduce #'* (mapcar #'standard-deviation (column-list x)))))
106 ;; (tol (* 0.001 (apply #'* (mapcar #'standard-deviation (column-list x)))))
107 (sweep-result
108 (if intercept
109 (sweep-operator m (iseq 1 n) tol)
110 (sweep-operator m (iseq 0 n) (cons 0.0 tol)))))
111 (setf (slot-value 'sweep-matrix) (first sweep-result))
112 (setf (slot-value 'total-sum-of-squares) tss)
113 (setf (slot-value 'residual-sum-of-squares)
114 (aref (first sweep-result) p p))
115 (setf (slot-value 'basis)
116 (let ((b (remove 0 (second sweep-result))))
117 (if b (- (reduce #'- (reverse b)) 1)
118 (error "no columns could be swept"))))))
120 (defmeth regression-model-proto :needs-computing (&optional set)
121 ;;(declare (ignore self))
122 (if set (setf (slot-value 'sweep-matrix) nil))
123 (null (slot-value 'sweep-matrix)))
125 (defmeth regression-model-proto :display ()
126 "Message args: ()
127 Prints the least squares regression summary. Variables not used in the fit
128 are marked as aliased."
129 (let ((coefs (coerce (send self :coef-estimates) 'list))
130 (se-s (send self :coef-standard-errors))
131 (x (send self :x))
132 (p-names (send self :predictor-names)))
133 (if (send self :weights)
134 (format t "~%Weighted Least Squares Estimates:~2%")
135 (format t "~%Least Squares Estimates:~2%"))
136 (when (send self :intercept)
137 (format t "Constant ~10f ~A~%"
138 (car coefs) (list (car se-s)))
139 (setf coefs (cdr coefs))
140 (setf se-s (cdr se-s)))
141 (dotimes (i (array-dimension x 1))
142 (cond
143 ((member i (send self :basis))
144 (format t "~22a ~10f ~A~%"
145 (select p-names i) (car coefs) (list (car se-s)))
146 (setf coefs (cdr coefs) se-s (cdr se-s)))
147 (t (format t "~22a aliased~%" (select p-names i)))))
148 (format t "~%")
149 (format t "R Squared: ~10f~%" (send self :r-squared))
150 (format t "Sigma hat: ~10f~%" (send self :sigma-hat))
151 (format t "Number of cases: ~10d~%" (send self :num-cases))
152 (if (/= (send self :num-cases) (send self :num-included))
153 (format t "Number of cases used: ~10d~%" (send self :num-included)))
154 (format t "Degrees of freedom: ~10d~%" (send self :df))
155 (format t "~%")))
157 ;;; Slot accessors and mutators
159 (defmeth regression-model-proto :x (&optional new-x)
160 "Message args: (&optional new-x)
161 With no argument returns the x matrix as supplied to m. With an argument
162 NEW-X sets the x matrix to NEW-X and recomputes the estimates."
163 (when (and new-x (matrixp new-x))
164 (setf (slot-value 'x) new-x)
165 (send self :needs-computing t))
166 (slot-value 'x))
168 (defmeth regression-model-proto :y (&optional new-y)
169 "Message args: (&optional new-y)
170 With no argument returns the y sequence as supplied to m. With an argument
171 NEW-Y sets the y sequence to NEW-Y and recomputes the estimates."
172 (when (and new-y (or (matrixp new-y) (sequencep new-y)))
173 (setf (slot-value 'y) new-y)
174 (send self :needs-computing t))
175 (slot-value 'y))
177 (defmeth regression-model-proto :intercept (&optional (val nil set))
178 "Message args: (&optional new-intercept)
179 With no argument returns T if the model includes an intercept term, nil if
180 not. With an argument NEW-INTERCEPT the model is changed to include or
181 exclude an intercept, according to the value of NEW-INTERCEPT."
182 (when set
183 (setf (slot-value 'intercept) val)
184 (send self :needs-computing t))
185 (slot-value 'intercept))
187 (defmeth regression-model-proto :weights (&optional (new-w nil set))
188 "Message args: (&optional new-w)
189 With no argument returns the weight sequence as supplied to m; NIL means
190 an unweighted model. NEW-W sets the weights sequence to NEW-W and
191 recomputes the estimates."
192 (when set
193 (setf (slot-value 'weights) new-w)
194 (send self :needs-computing t))
195 (slot-value 'weights))
197 (defmeth regression-model-proto :total-sum-of-squares ()
198 "Message args: ()
199 Returns the total sum of squares around the mean."
200 (if (send self :needs-computing) (send self :compute))
201 (slot-value 'total-sum-of-squares))
203 (defmeth regression-model-proto :residual-sum-of-squares ()
204 "Message args: ()
205 Returns the residual sum of squares for the model."
206 (if (send self :needs-computing) (send self :compute))
207 (slot-value 'residual-sum-of-squares))
209 (defmeth regression-model-proto :basis ()
210 "Message args: ()
211 Returns the indices of the variables used in fitting the model."
212 (if (send self :needs-computing) (send self :compute))
213 (slot-value 'basis))
215 (defmeth regression-model-proto :sweep-matrix ()
216 "Message args: ()
217 Returns the swept sweep matrix. For internal use"
218 (if (send self :needs-computing) (send self :compute))
219 (slot-value 'sweep-matrix))
221 (defmeth regression-model-proto :included (&optional new-included)
222 "Message args: (&optional new-included)
223 With no argument, NIL means a case is not used in calculating estimates, and non-nil means it is used. NEW-INCLUDED is a sequence of length of y of nil and t to select cases. Estimates are recomputed."
224 (when (and new-included
225 (= (length new-included) (send self :num-cases)))
226 (setf (slot-value 'included) (copy-seq new-included))
227 (send self :needs-computing t))
228 (if (slot-value 'included)
229 (slot-value 'included)
230 (repeat t (send self :num-cases))))
232 (defmeth regression-model-proto :predictor-names (&optional (names nil set))
233 "Message args: (&optional (names nil set))
234 With no argument returns the predictor names. NAMES sets the names."
235 (if set (setf (slot-value 'predictor-names) (mapcar #'string names)))
236 (let ((p (array-dimension (send self :x) 1))
237 (p-names (slot-value 'predictor-names)))
238 (if (not (and p-names (= (length p-names) p)))
239 (setf (slot-value 'predictor-names)
240 (mapcar #'(lambda (a) (format nil "Variable ~a" a))
241 (iseq 0 (- p 1))))))
242 (slot-value 'predictor-names))
244 (defmeth regression-model-proto :response-name (&optional (name "Y" set))
245 "Message args: (&optional name)
246 With no argument returns the response name. NAME sets the name."
247 ;;(declare (ignore self))
248 (if set (setf (slot-value 'response-name) (if name (string name) "Y")))
249 (slot-value 'response-name))
251 (defmeth regression-model-proto :case-labels (&optional (labels nil set))
252 "Message args: (&optional labels)
253 With no argument returns the case-labels. LABELS sets the labels."
254 (if set (setf (slot-value 'case-labels)
255 (if labels
256 (mapcar #'string labels)
257 (mapcar #'(lambda (x) (format nil "~d" x))
258 (iseq 0 (- (send self :num-cases) 1))))))
259 (slot-value 'case-labels))
262 ;;; Other Methods
263 ;;; None of these methods access any slots directly.
266 (defmeth regression-model-proto :num-cases ()
267 "Message args: ()
268 Returns the number of cases in the model."
269 (length (send self :y)))
271 (defmeth regression-model-proto :num-included ()
272 "Message args: ()
273 Returns the number of cases used in the computations."
274 (sum (if-else (send self :included) 1 0)))
276 (defmeth regression-model-proto :num-coefs ()
277 "Message args: ()
278 Returns the number of coefficients in the fit model (including the
279 intercept if the model includes one)."
280 (if (send self :intercept)
281 (+ 1 (length (send self :basis)))
282 (length (send self :basis))))
284 (defmeth regression-model-proto :df ()
285 "Message args: ()
286 Returns the number of degrees of freedom in the model."
287 (- (send self :num-included) (send self :num-coefs)))
289 (defmeth regression-model-proto :x-matrix ()
290 "Message args: ()
291 Returns the X matrix for the model, including a column of 1's, if
292 appropriate. Columns of X matrix correspond to entries in basis."
293 (let ((m (select (send self :x)
294 (iseq 0 (- (send self :num-cases) 1))
295 (send self :basis))))
296 (if (send self :intercept)
297 (bind-columns (repeat 1 (send self :num-cases)) m)
298 m)))
300 (defmeth regression-model-proto :leverages ()
301 "Message args: ()
302 Returns the diagonal elements of the hat matrix."
303 (let* ((weights (send self :weights))
304 (x (send self :x-matrix))
305 (raw-levs
306 (matmult (* (matmult x (send self :xtxinv)) x)
307 (repeat 1 (send self :num-coefs)))))
308 (if weights (* weights raw-levs) raw-levs)))
310 (defmeth regression-model-proto :fit-values ()
311 "Message args: ()
312 Returns the fitted values for the model."
313 (matmult (send self :x-matrix) (send self :coef-estimates)))
315 (defmeth regression-model-proto :raw-residuals ()
316 "Message args: ()
317 Returns the raw residuals for a model."
318 (- (send self :y) (send self :fit-values)))
320 (defmeth regression-model-proto :residuals ()
321 "Message args: ()
322 Returns the raw residuals for a model without weights. If the model
323 includes weights the raw residuals times the square roots of the weights
324 are returned."
325 (let ((raw-residuals (send self :raw-residuals))
326 (weights (send self :weights)))
327 (if weights (* (sqrt weights) raw-residuals) raw-residuals)))
329 (defmeth regression-model-proto :sum-of-squares ()
330 "Message args: ()
331 Returns the error sum of squares for the model."
332 (send self :residual-sum-of-squares))
334 (defmeth regression-model-proto :sigma-hat ()
335 "Message args: ()
336 Returns the estimated standard deviation of the deviations about the
337 regression line."
338 (let ((ss (send self :sum-of-squares))
339 (df (send self :df)))
340 (if (/= df 0) (sqrt (/ ss df)))))
342 ;; for models without an intercept the 'usual' formula for R^2 can give
343 ;; negative results; hence the max.
344 (defmeth regression-model-proto :r-squared ()
345 "Message args: ()
346 Returns the sample squared multiple correlation coefficient, R squared, for
347 the regression."
348 (max (- 1 (/ (send self :sum-of-squares) (send self :total-sum-of-squares)))
351 (defmeth regression-model-proto :coef-estimates ()
352 "Message args: ()
353 Returns the OLS (ordinary least squares) estimates of the regression
354 coefficients. Entries beyond the intercept correspond to entries in basis."
355 (let ((n (array-dimension (send self :x) 1))
356 (indices (if (send self :intercept)
357 (cons 0 (+ 1 (send self :basis)))
358 (+ 1 (send self :basis))))
359 (m (send self :sweep-matrix)))
360 (coerce (compound-data-seq (select m (+ 1 n) indices)) 'list)))
362 (defmeth regression-model-proto :xtxinv ()
363 "Message args: ()
364 Returns ((X^T) X)^(-1) or ((X^T) W X)^(-1)."
365 (let ((indices (if (send self :intercept)
366 (cons 0 (1+ (send self :basis)))
367 (1+ (send self :basis)))))
368 (select (send self :sweep-matrix) indices indices)))
370 (defmeth regression-model-proto :coef-standard-errors ()
371 "Message args: ()
372 Returns estimated standard errors of coefficients. Entries beyond the
373 intercept correspond to entries in basis."
374 (let ((s (send self :sigma-hat)))
375 (if s (* (send self :sigma-hat) (sqrt (diagonal (send self :xtxinv)))))))
377 (defmeth regression-model-proto :studentized-residuals ()
378 "Message args: ()
379 Computes the internally studentized residuals for included cases and externally studentized residuals for excluded cases."
380 (let ((res (send self :residuals))
381 (lev (send self :leverages))
382 (sig (send self :sigma-hat))
383 (inc (send self :included)))
384 (if-else inc
385 (/ res (* sig (sqrt (pmax .00001 (- 1 lev)))))
386 (/ res (* sig (sqrt (+ 1 lev)))))))
388 (defmeth regression-model-proto :externally-studentized-residuals ()
389 "Message args: ()
390 Computes the externally studentized residuals."
391 (let* ((res (send self :studentized-residuals))
392 (df (send self :df)))
393 (if-else (send self :included)
394 (* res (sqrt (/ (- df 1) (- df (^ res 2)))))
395 res)))
397 (defmeth regression-model-proto :cooks-distances ()
398 "Message args: ()
399 Computes Cook's distances."
400 (let ((lev (send self :leverages))
401 (res (/ (^ (send self :studentized-residuals) 2)
402 (send self :num-coefs))))
403 (if-else (send self :included) (* res (/ lev (- 1 lev) )) (* res lev))))
406 (defun plot-points (x y &rest args)
407 "FIXME!!"
408 (declare (ignore x y args))
409 (error "Graphics not implemented yet."))
411 ;; Can not plot points yet!!
412 (defmeth regression-model-proto :plot-residuals (&optional x-values)
413 "Message args: (&optional x-values)
414 Opens a window with a plot of the residuals. If X-VALUES are not supplied
415 the fitted values are used. The plot can be linked to other plots with the
416 link-views function. Returns a plot object."
417 (plot-points (if x-values x-values (send self :fit-values))
418 (send self :residuals)
419 :title "Residual Plot"
420 :point-labels (send self :case-labels)))
422 (defmeth regression-model-proto :plot-bayes-residuals
423 (&optional x-values)
424 "Message args: (&optional x-values)
425 Opens a window with a plot of the standardized residuals and two standard
426 error bars for the posterior distribution of the actual deviations from the
427 line. See Chaloner and Brant. If X-VALUES are not supplied the fitted values
428 are used. The plot can be linked to other plots with the link-views function.
429 Returns a plot object."
430 (let* ((r (/ (send self :residuals) (send self :sigma-hat)))
431 (d (* 2 (sqrt (send self :leverages))))
432 (low (- r d))
433 (high (+ r d))
434 (x-values (if x-values x-values (send self :fit-values)))
435 (p (plot-points x-values r
436 :title "Bayes Residual Plot"
437 :point-labels (send self :case-labels))))
438 (map 'list #'(lambda (a b c d) (send p :plotline a b c d nil))
439 x-values low x-values high)
440 (send p :adjust-to-data)