compiles again -- fixed function calls, but need to get the generic function signatur...
[CommonLispStat.git] / src / stat-models / regression-clon.lisp
blob6919282f838318b6e523c8f454fbf9c6eb83ab54
1 ;;; -*- mode: lisp -*-
3 ;;;;
4 ;;;; regression.lsp XLISP-STAT regression model proto and methods
5 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
6 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
7 ;;;; You may give out copies of this software; for conditions see the file
8 ;;;; COPYING included with this distribution.
9 ;;;;
10 ;;;;
11 ;;;; Incorporates modifications suggested by Sandy Weisberg.
13 (asdf:oos 'asdf:load-op 'clon)
15 (defpackage :regression-clon
16 (:use common-lisp
17 clon
18 lisp-matrix
19 lisp-stat-data-examples)
20 (:export regression-model regression-model-proto x y intercept
21 sweep-matrix basis weights included total-sum-of-squares
22 residual-sum-of-squares predictor-names response-name
23 case-labels))
25 (in-package :regression-clon)
27 ;;; Support functions
29 (defun flatten-list (lst)
30 "Flattens a list of lists into a single list. Only useful when
31 we've mucked up data. Sign of usage means poor coding!"
32 (cond ((null lst) ;; endp?
33 nil)
34 ((listp lst)
35 (append (flatten-list (car lst)) (flatten-list (cdr lst))))
37 (list lst))))
39 (defun repeat (a b)
40 "Args: (vals times)
41 Repeats VALS. If TIMES is a number and VALS is a non-null, non-array atom,
42 a list of length TIMES with all elements eq to VALS is returned. If VALS
43 is a list and TIMES is a number then VALS is appended TIMES times. If
44 TIMES is a list of numbers then VALS must be a list of equal length and
45 the simpler version of repeat is mapped down the two lists.
46 Examples: (repeat 2 5) returns (2 2 2 2 2)
47 (repeat '(1 2) 3) returns (1 2 1 2 1 2)
48 but not:
49 (repeat '(4 5 6) '(1 2 3)) returns (4 5 5 6 6 6)
50 (repeat '((4) (5 6)) '(2 3)) returns (4 4 5 6 5 6 5 6)"
51 (if (> b 2)
52 (flatten-list (append (repeat (list a) (- b 1)) (list a)))
53 a))
55 (cond ((compound-data-p b)
56 (let* ((reps (coerce (compound-data-seq (map-elements #'repeat a b))
57 'list))
58 (result (first reps))
59 (tail (last (first reps))))
60 (dolist (next (rest reps) result)
61 (when next
62 (setf (rest tail) next)
63 (setf tail (last next))))))
64 (t (let* ((a (if (compound-data-p a)
65 (coerce (compound-data-seq a) 'list)
66 (list a)))
67 (result nil))
68 (dotimes (i b result)
69 (let ((next (copy-list a)))
70 (if result (setf (rest (last next)) result))
71 (setf result next)))))))
76 ;;;; Regresion Model Prototype
78 (define-prototype regression-model-proto
79 (:documentation "port of proto to CLON for Normal Linear Regression Model.")
80 x y intercept sweep-matrix basis weights
81 included total-sum-of-squares residual-sum-of-squares
82 predictor-names response-name case-labels)
84 ;; The doc for this function string is at the limit of XLISP's string
85 ;; constant size - making it longer may cause problems
86 (defun regression-model (x y &key
87 (intercept T)
88 (print T)
89 weights
90 (included (repeat t (length y)))
91 predictor-names
92 response-name
93 case-labels)
94 "Args: (x y &key (intercept T) (print T) weights
95 included predictor-names response-name case-labels)
96 X - list of independent variables or X matrix
97 Y - dependent variable.
98 INTERCEPT - T to include (default), NIL for no intercept
99 PRINT - if not NIL print summary information
100 WEIGHTS - if supplied should be the same length as Y; error variances are
101 assumed to be inversely proportional to WEIGHTS
102 PREDICTOR-NAMES
103 RESPONSE-NAME
104 CASE-LABELS - sequences of strings or symbols.
105 INCLUDED - if supplied should be the same length as Y, with elements nil
106 to skip a in computing estimates (but not in residual analysis).
107 Returns a regression model object. To examine the model further assign the
108 result to a variable and send it messages.
109 Example (data are in file absorbtion.lsp in the sample data directory/folder):
110 (def m (regression-model (list iron aluminum) absorbtion))
111 (send m :help)
112 (send m :plot-residuals)"
113 (check-type x matrix-like)
114 (let ((m (clone =regression-model-proto=)))
115 (setf (field-value x m) x)
116 (setf (field-value y m) y)
117 (send nil :intercept m intercept)
118 (send nil :weights m weights)
119 (send nil :included m included)
120 (send nil :predictor-names m predictor-names)
121 (send nil :response-name m response-name)
122 (send nil :case-labels m case-labels)
123 (if print (send nil :display m))
126 (define-method
127 is-new regression-model-proto ()
128 (send nil self :needs-computing t))
130 (define-method
131 save regression-model-proto ()
132 "Message args: ()
133 Returns an expression that will reconstruct the regression model."
134 `(regression-model ',(send nil :x self)
135 ',(send nil :y self)
136 :intercept ',(send nil :intercept self)
137 :weights ',(send nil :weights self)
138 :included ',(send nil :included self)
139 :predictor-names ',(send nil :predictor-names self)
140 :response-name ',(send nil :response-name self)
141 :case-labels ',(send nil :case-labels self)))
144 ;;; Computing and Display Methods
147 (define-method
148 compute regression-model-proto ()
149 "Message args: (). Recomputes the estimates. For internal use by other messages"
150 (let* ((included (if-else (send nil :included) 1 0))
151 (x (send nil :x self))
152 (y (send nil :y self))
153 (intercept (send nil :intercept self))
154 (weights (send nil :weights self))
155 (w (if weights (* included weights) included))
156 ;; (m (make-sweep-matrix x y w))
157 (n (array-dimension x 1))
158 (p (- (array-dimension m 0) 1))
159 (tss (aref m p p))
160 (tol (* .0001 (mapcar #'standard-deviation (column-list x))))
161 (sweep-result
162 (if intercept
163 (sweep-operator m (iseq 1 n) tol)
164 (sweep-operator m (iseq 0 n) (cons 0.0 tol)))))
165 (setf (field-value 'sweep-matrix self) (first sweep-result))
166 (setf (field-value 'total-sum-of-squares self) tss)
167 (setf (field-value 'residual-sum-of-squares self)
168 (aref (first sweep-result) p p))
169 (setf (field-value 'basis self)
170 (let ((b (remove 0 (second sweep-result))))
171 (if b
172 (- (reverse b) 1)
173 (error "no columns could be swept"))))))
175 (define-method
176 needs-computing regression-model-proto (&optional set)
177 "Toggle switch as needed to figure out if we recompute."
178 (if set (setf (field-value 'sweep-matrix self) nil))
179 (null (field-value 'sweep-matrix self)))
181 (define-method
182 display regression-model-proto ()
183 "Message args: ()
184 Prints the least squares regression summary. Variables not used in the fit
185 are marked as aliased."
186 (let ((coefs (coerce (send nil :coef-estimates self) 'list))
187 (se-s (send nil :coef-standard-errors self))
188 (x (send nil :x self))
189 (p-names (send nil :predictor-names self)))
190 (if (send nil :weights self)
191 (format t "~%Weighted Least Squares Estimates:~2%")
192 (format t "~%Least Squares Estimates:~2%"))
193 (when (send nil :intercept self)
194 (format t "Constant ~10f ~A~%"
195 (car coefs) (list (car se-s)))
196 (setf coefs (cdr coefs))
197 (setf se-s (cdr se-s)))
198 (dotimes (i (array-dimension x 1))
199 (cond
200 ((member i (send nil :basis self))
201 (format t "~22a ~10f ~A~%"
202 (select p-names i) (car coefs) (list (car se-s)))
203 (setf coefs (cdr coefs) se-s (cdr se-s)))
204 (t (format t "~22a aliased~%" (select p-names i)))))
205 (format t "~%")
206 (format t "R Squared: ~10f~%" (send nil :r-squared self))
207 (format t "Sigma hat: ~10f~%" (send nil :sigma-hat self))
208 (format t "Number of cases: ~10d~%" (send nil :num-cases self))
209 (if (/= (send nil :num-cases self) (send nil :num-included self))
210 (format t "Number of cases used: ~10d~%" (send nil :num-included self)))
211 (format t "Degrees of freedom: ~10d~%" (send nil :df self))
212 (format t "~%")))
215 ;;; Slot accessors and mutators
218 (define-method
219 x regression-model-proto (&optional new-x)
220 "Message args: (&optional new-x)
221 With no argument returns the x matrix as supplied to m. With an argument
222 NEW-X sets the x matrix to NEW-X and recomputes the estimates."
223 (when (and new-x (typep new-x 'matrix-like))
224 (setf (field-value 'x self) new-x)
225 (send nil :needs-computing self t))
226 (field-value 'x self))
228 (define-method
229 y regression-model-proto (&optional new-y)
230 "Message args: (&optional new-y)
231 With no argument returns the y sequence as supplied to m. With an argument
232 NEW-Y sets the y sequence to NEW-Y and recomputes the estimates."
233 (when (and new-y (or (typep new-y 'matrix-like)
234 (typep new-y 'sequence)))
235 (setf (field-value 'y self) new-y)
236 (send nil :needs-computing self t))
237 (field-value 'y self))
239 (define-method
240 intercept regression-model-proto (&optional (val nil set))
241 "Message args: (&optional new-intercept)
242 With no argument returns T if the model includes an intercept term, nil if
243 not. With an argument NEW-INTERCEPT the model is changed to include or
244 exclude an intercept, according to the value of NEW-INTERCEPT."
245 (when set
246 (setf (field-value 'intercept self ) val)
247 (send nil :needs-computing self t))
248 (field-value 'intercept self))
250 (define-method
251 weights regression-model-proto (&optional (new-w nil set))
252 "Message args: (&optional new-w)
253 With no argument returns the weight sequence as supplied to m; NIL means
254 an unweighted model. NEW-W sets the weights sequence to NEW-W and
255 recomputes the estimates."
256 (when set
257 (setf (field-value 'weights self) new-w)
258 (send nil :needs-computing self t))
259 (field-value 'weights self))
261 (define-method
262 total-sum-of-squares regression-model-proto ()
263 "Message args: ()
264 Returns the total sum of squares around the mean."
265 (if (send nil self :needs-computing) (send nil :compute self))
266 (field-value 'total-sum-of-squares self))
268 (define-method
269 residual-sum-of-squares regression-model-proto ()
270 "Message args: ()
271 Returns the residual sum of squares for the model."
272 (if (send nil :needs-computing self) (send nil self :compute self))
273 (field-value 'residual-sum-of-squares self))
275 (define-method
276 basis regression-model-proto ()
277 "Message args: ()
278 Returns the indices of the variables used in fitting the model."
279 (if (send nil :needs-computing self) (send nil :compute self))
280 (field-value 'basis self))
282 (define-method
283 sweep-matrix regression-model-proto ()
284 "Message args: ()
285 Returns the swept sweep matrix. For internal use"
286 (if (send nil :needs-computing self) (send nil :compute self))
287 (field-value 'sweep-matrix self))
289 (define-method
290 included regression-model-proto (&optional new-included)
291 "Message args: (&optional new-included)
292 With no argument, NIL means a case is not used in calculating
293 estimates, and non-nil means it is used. NEW-INCLUDED is a sequence
294 of length of y of nil and t to select cases. Estimates are
295 recomputed."
296 (when (and new-included
297 (= (length new-included) (send nil :num-cases self)))
298 (setf (field-value 'included self) (copy-seq new-included))
299 (send nil :needs-computing self t))
300 (if (field-value 'included self)
301 (field-value 'included self)
302 (repeat t (send nil :num-cases self))))
304 (define-method
305 predictor-names regression-model-proto (&optional (names nil set))
306 "Message args: (&optional (names nil set))
307 With no argument returns the predictor names. NAMES sets the names."
308 (if set (setf (field-value 'predictor-names self) (mapcar #'string names)))
309 (let ((p (array-dimension (send nil :x self) 1))
310 (p-names (field-value 'predictor-names self)))
311 (if (not (and p-names (= (length p-names) p)))
312 (setf (field-value 'predictor-names self)
313 (mapcar #'(lambda (a) (format nil "Variable ~a" a))
314 (iseq 0 (- p 1))))))
315 (field-value 'predictor-names self))
317 (define-method
318 response-name regression-model-proto (&optional (name "Y" set))
319 "Message args: (&optional name)
320 With no argument returns the response name. NAME sets the name."
321 (if set (setf (field-value 'response-name self) (if name (string name) "Y")))
322 (field-value 'response-name self))
324 (define-method
325 case-labels regression-model-proto (&optional (labels nil set))
326 "Message args: (&optional labels)
327 With no argument returns the case-labels. LABELS sets the labels."
328 (if set (setf (field-value 'case-labels self)
329 (if labels
330 (mapcar #'string labels)
331 (mapcar #'(lambda (x) (format nil "~d" x))
332 (iseq 0 (- (send nil :num-cases self) 1))))))
333 (field-value 'case-labels self))
336 ;;; Other Methods
337 ;;; None of these methods access any slots directly.
340 (define-method
341 num-cases regression-model-proto ()
342 "Message args: ()
343 Returns the number of cases in the model."
344 (length (send nil :y self)))
346 (define-method
347 num-included regression-model-proto ()
348 "Message args: ()
349 Returns the number of cases used in the computations."
350 (sum (if-else (send nil :included self) 1 0)))
352 (define-method
353 num-coefs regression-model-proto ()
354 "Message args: ()
355 Returns the number of coefficients in the fit model (including the
356 intercept if the model includes one)."
357 (if (send nil :intercept self)
358 (+ 1 (length (send nil :basis self)))
359 (length (send nil :basis self))))
361 (define-method
362 df regression-model-proto ()
363 "Message args: ()
364 Returns the number of degrees of freedom in the model."
365 (- (send nil :num-included self) (send nil :num-coefs self)))
367 (define-method
368 x-matrix regression-model-proto ()
369 "Message args: ()
370 Returns the X matrix for the model, including a column of 1's, if
371 appropriate. Columns of X matrix correspond to entries in basis."
372 (field-value :x self))
374 (define-method
375 leverages regression-model-proto ()
376 "Message args: ()
377 Returns the diagonal elements of the hat matrix."
378 (let* ((weights (send nil :weights self))
379 (x (send nil :x-matrix self))
380 (raw-levs (m* (* (m* x (send nil :xtxinv self)) x)
381 (repeat 1 (send nil :num-coefs self)))))
382 (if weights (* weights raw-levs) raw-levs)))
384 (define-method
385 fit-values regression-model-proto ()
386 "Message args: ()
387 Returns the fitted values for the model."
388 (m* (send nil :x-matrix self) (send nil :coef-estimates self)))
390 (define-method
391 raw-residuals regression-model-proto ()
392 "Message args: ()
393 Returns the raw residuals for a model."
394 (- (send nil :y self) (send nil :fit-values self)))
396 (define-method
397 residuals regression-model-proto ()
398 "Message args: ()
399 Returns the raw residuals for a model without weights. If the model
400 includes weights the raw residuals times the square roots of the
401 weights are returned."
402 (let ((raw-residuals (send nil :raw-residuals self))
403 (weights (send nil :weights self)))
404 (if weights
405 (* (sqrt weights) raw-residuals)
406 raw-residuals)))
408 (define-method
409 sum-of-squares regression-model-proto ()
410 "Message args: ()
411 Returns the error sum of squares for the model."
412 (send nil :residual-sum-of-squares self))
414 (define-method
415 sigma-hat regression-model-proto ()
416 "Message args: ()
417 Returns the estimated standard deviation of the deviations about the
418 regression line."
419 (let ((ss (send nil :sum-of-squares self))
420 (df (send nil :df self)))
421 (if (/= df 0) (sqrt (/ ss df)))))
423 ;; for models without an intercept the 'usual' formula for R^2 can
424 ;; give negative results; hence the max.
425 (define-method
426 r-squared regression-model-proto ()
427 "Message args: ()
428 Returns the sample squared multiple correlation coefficient, R
429 squared, for the regression."
430 (max (- 1 (/ (send nil :sum-of-squares self)
431 (send nil :total-sum-of-squares self)))
434 (define-method
435 coef-estimates regression-model-proto ()
436 "Message args: ()
437 Returns the OLS (ordinary least squares) estimates of the regression
438 coefficients. Entries beyond the intercept correspond to entries in
439 basis."
440 (let ((n (array-dimension (send nil :x self) 1))
441 (indices (if (send nil :intercept self)
442 (cons 0 (+ 1 (send nil :basis self)))
443 (+ 1 (send nil :basis self))))
444 (m (send nil :sweep-matrix self)))
445 (coerce (compound-data-seq (select m (+ 1 n) indices)) 'list)))
447 (define-method
448 xtxinv regression-model-proto ()
449 "Message args: ()
450 Returns ((X^T) X)^(-1) or ((X^T) W X)^(-1)."
451 (let ((indices (if (send nil :intercept self)
452 (cons 0 (1+ (send nil :basis self)))
453 (1+ (send nil :basis self)))))
454 (select (send nil :sweep-matrix self) indices indices)))
456 (define-method
457 coef-standard-errors regression-model-proto ()
458 "Message args: ()
459 Returns estimated standard errors of coefficients. Entries beyond the
460 intercept correspond to entries in basis."
461 (let ((s (send nil :sigma-hat self)))
462 (if s (* (send nil :sigma-hat self)
463 (sqrt (diagonal (send nil :xtxinv self)))))))
465 (define-method
466 studentized-residuals regression-model-proto ()
467 "Message args: ()
469 Computes the internally studentized residuals for included cases and
470 externally studentized residuals for excluded cases."
471 (let ((res (send nil :residuals self))
472 (lev (send nil :leverages self))
473 (sig (send nil :sigma-hat self))
474 (inc (send nil :included self)))
475 (if-else inc
476 (/ res (* sig (sqrt (pmax .00001 (- 1 lev)))))
477 (/ res (* sig (sqrt (+ 1 lev)))))))
479 (define-method
480 externally-studentized-residuals regression-model-proto ()
481 "Message args: ()
482 Computes the externally studentized residuals."
483 (let* ((res (send nil :studentized-residuals self))
484 (df (send nil :df self)))
485 (if-else (send nil :included self)
486 (* res (sqrt (/ (- df 1) (- df (^ res 2)))))
487 res)))
489 (define-method
490 cooks-distances regression-model-proto ()
491 "Message args: ()
492 Computes Cook's distances."
493 (let ((lev (send nil :leverages self))
494 (res (/ (^ (send nil :studentized-residuals self) 2)
495 (send nil :num-coefs self))))
496 (if-else (send nil :included self)
497 (* res (/ lev (- 1 lev)))
498 (* res lev))))
501 (define-method
502 plot-residuals regression-model-proto (&optional x-values)
503 "Message args: (&optional x-values)
505 Opens a window with a plot of the residuals. If X-VALUES are not
506 supplied the fitted values are used. The plot can be linked to other
507 plots with the link-views function. Returns a plot object."
508 (plot-points (if x-values
509 x-values
510 (send nil :fit-values self))
511 (send nil :residuals self)
512 :title "Residual Plot"
513 :point-labels (send nil :case-labels self)))
515 (define-method
516 plot-bayes-residuals regression-model-proto (&optional x-values)
517 "Message args: (&optional x-values)
519 Opens a window with a plot of the standardized residuals and two
520 standard error bars for the posterior distribution of the actual
521 deviations from the line. See Chaloner and Brant. If X-VALUES are not
522 supplied the fitted values are used. The plot can be linked to other
523 plots with the link-views function. Returns a plot object."
524 (let* ((r (/ (send nil :residuals self)
525 (send nil :sigma-hat self)))
526 (d (* 2 (sqrt (send nil :leverages self))))
527 (low (- r d))
528 (high (+ r d))
529 (x-values (if x-values
530 x-values
531 (send nil :fit-values self)))
532 (p (plot-points x-values r
533 :title "Bayes Residual Plot"
534 :point-labels (send nil :case-labels self))))
535 (map 'list
536 #'(lambda (a b c d)
537 (send nil :plotline p a b c d nil))
538 x-values low x-values high)
539 (send nil :adjust-to-data p)
543 ;;; examples
546 (defparameter *lm-ex-obj*
547 (regression-model absorbtion iron))