4 ;;;; regression.lsp XLISP-STAT regression model proto and methods
5 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
6 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
7 ;;;; You may give out copies of this software; for conditions see the file
8 ;;;; COPYING included with this distribution.
11 ;;;; Incorporates modifications suggested by Sandy Weisberg.
13 ;;; Sheeple is another prototype-based system.
14 (asdf:oos
'asdf
:load-op
'sheeple
)
16 (defpackage :regression-sheeple
20 lisp-stat-data-examples
)
21 (:export regression-model regression-model-proto x y intercept
22 sweep-matrix basis weights included total-sum-of-squares
23 residual-sum-of-squares predictor-names response-name
26 (in-package :regression-clon
)
30 (defun flatten-list (lst)
31 "Flattens a list of lists into a single list. Only useful when
32 we've mucked up data. Sign of usage means poor coding!"
33 (cond ((null lst
) ;; endp?
36 (append (flatten-list (car lst
)) (flatten-list (cdr lst
))))
42 Repeats VALS. If TIMES is a number and VALS is a non-null, non-array atom,
43 a list of length TIMES with all elements eq to VALS is returned. If VALS
44 is a list and TIMES is a number then VALS is appended TIMES times. If
45 TIMES is a list of numbers then VALS must be a list of equal length and
46 the simpler version of repeat is mapped down the two lists.
47 Examples: (repeat 2 5) returns (2 2 2 2 2)
48 (repeat '(1 2) 3) returns (1 2 1 2 1 2)
50 (repeat '(4 5 6) '(1 2 3)) returns (4 5 5 6 6 6)
51 (repeat '((4) (5 6)) '(2 3)) returns (4 4 5 6 5 6 5 6)"
53 (flatten-list (append (repeat (list a
) (- b
1)) (list a
)))
56 (cond ((compound-data-p b
)
57 (let* ((reps (coerce (compound-data-seq (map-elements #'repeat a b
))
60 (tail (last (first reps
))))
61 (dolist (next (rest reps
) result
)
63 (setf (rest tail
) next
)
64 (setf tail
(last next
))))))
65 (t (let* ((a (if (compound-data-p a
)
66 (coerce (compound-data-seq a
) 'list
)
70 (let ((next (copy-list a
)))
71 (if result
(setf (rest (last next
)) result
))
72 (setf result next
)))))))
77 ;;;; Regresion Model Prototype
79 (defsheep =regression-model-proto
= ()
80 (( x y intercept sweep-matrix basis weights
81 included total-sum-of-squares residual-sum-of-squares
82 predictor-names response-name case-labels
)))
83 ;; (:documentation "port of proto to SHEEPLE for Normal Linear Regression Model.")
85 ;; The doc for this function string is at the limit of XLISP's string
86 ;; constant size - making it longer may cause problems
87 (defun regression-model (x y
&key
91 (included (repeat t
(length y
)))
95 "Args: (x y &key (intercept T) (print T) weights
96 included predictor-names response-name case-labels)
97 X - list of independent variables or X matrix
98 Y - dependent variable.
99 INTERCEPT - T to include (default), NIL for no intercept
100 PRINT - if not NIL print summary information
101 WEIGHTS - if supplied should be the same length as Y; error variances are
102 assumed to be inversely proportional to WEIGHTS
105 CASE-LABELS - sequences of strings or symbols.
106 INCLUDED - if supplied should be the same length as Y, with elements nil
107 to skip a in computing estimates (but not in residual analysis).
108 Returns a regression model object. To examine the model further assign the
109 result to a variable and send it messages.
110 Example (data are in file absorbtion.lsp in the sample data directory/folder):
111 (def m (regression-model (list iron aluminum) absorbtion))
113 (send m :plot-residuals)"
114 (check-type x matrix-like
)
115 (let ((m (clone =regression-model-proto
=)))
116 (setf (field-value x m
) x
)
117 (setf (field-value y m
) y
)
118 (send nil
:intercept m intercept
)
119 (send nil
:weights m weights
)
120 (send nil
:included m included
)
121 (send nil
:predictor-names m predictor-names
)
122 (send nil
:response-name m response-name
)
123 (send nil
:case-labels m case-labels
)
124 (if print
(send nil
:display m
))
128 is-new regression-model-proto
()
129 (send nil self
:needs-computing t
))
132 save regression-model-proto
()
134 Returns an expression that will reconstruct the regression model."
135 `(regression-model ',(send nil
:x self
)
137 :intercept
',(send nil
:intercept self
)
138 :weights
',(send nil
:weights self
)
139 :included
',(send nil
:included self
)
140 :predictor-names
',(send nil
:predictor-names self
)
141 :response-name
',(send nil
:response-name self
)
142 :case-labels
',(send nil
:case-labels self
)))
145 ;;; Computing and Display Methods
149 compute regression-model-proto
()
150 "Message args: (). Recomputes the estimates. For internal use by other messages"
151 (let* ((included (if-else (send nil
:included
) 1 0))
152 (x (send nil
:x self
))
153 (y (send nil
:y self
))
154 (intercept (send nil
:intercept self
))
155 (weights (send nil
:weights self
))
156 (w (if weights
(* included weights
) included
))
157 ;; (m (make-sweep-matrix x y w))
158 (n (array-dimension x
1))
159 (p (- (array-dimension m
0) 1))
161 (tol (* .0001 (mapcar #'standard-deviation
(column-list x
))))
164 (sweep-operator m
(iseq 1 n
) tol
)
165 (sweep-operator m
(iseq 0 n
) (cons 0.0 tol
)))))
166 (setf (field-value 'sweep-matrix self
) (first sweep-result
))
167 (setf (field-value 'total-sum-of-squares self
) tss
)
168 (setf (field-value 'residual-sum-of-squares self
)
169 (aref (first sweep-result
) p p
))
170 (setf (field-value 'basis self
)
171 (let ((b (remove 0 (second sweep-result
))))
174 (error "no columns could be swept"))))))
177 needs-computing regression-model-proto
(&optional set
)
178 "Toggle switch as needed to figure out if we recompute."
179 (if set
(setf (field-value 'sweep-matrix self
) nil
))
180 (null (field-value 'sweep-matrix self
)))
183 display regression-model-proto
()
185 Prints the least squares regression summary. Variables not used in the fit
186 are marked as aliased."
187 (let ((coefs (coerce (send nil
:coef-estimates self
) 'list
))
188 (se-s (send nil
:coef-standard-errors self
))
189 (x (send nil
:x self
))
190 (p-names (send nil
:predictor-names self
)))
191 (if (send nil
:weights self
)
192 (format t
"~%Weighted Least Squares Estimates:~2%")
193 (format t
"~%Least Squares Estimates:~2%"))
194 (when (send nil
:intercept self
)
195 (format t
"Constant ~10f ~A~%"
196 (car coefs
) (list (car se-s
)))
197 (setf coefs
(cdr coefs
))
198 (setf se-s
(cdr se-s
)))
199 (dotimes (i (array-dimension x
1))
201 ((member i
(send nil
:basis self
))
202 (format t
"~22a ~10f ~A~%"
203 (select p-names i
) (car coefs
) (list (car se-s
)))
204 (setf coefs
(cdr coefs
) se-s
(cdr se-s
)))
205 (t (format t
"~22a aliased~%" (select p-names i
)))))
207 (format t
"R Squared: ~10f~%" (send nil
:r-squared self
))
208 (format t
"Sigma hat: ~10f~%" (send nil
:sigma-hat self
))
209 (format t
"Number of cases: ~10d~%" (send nil
:num-cases self
))
210 (if (/= (send nil
:num-cases self
) (send nil
:num-included self
))
211 (format t
"Number of cases used: ~10d~%" (send nil
:num-included self
)))
212 (format t
"Degrees of freedom: ~10d~%" (send nil
:df self
))
216 ;;; Slot accessors and mutators
220 x regression-model-proto
(&optional new-x
)
221 "Message args: (&optional new-x)
222 With no argument returns the x matrix as supplied to m. With an argument
223 NEW-X sets the x matrix to NEW-X and recomputes the estimates."
224 (when (and new-x
(typep new-x
'matrix-like
))
225 (setf (field-value 'x self
) new-x
)
226 (send nil
:needs-computing self t
))
227 (field-value 'x self
))
230 y regression-model-proto
(&optional new-y
)
231 "Message args: (&optional new-y)
232 With no argument returns the y sequence as supplied to m. With an argument
233 NEW-Y sets the y sequence to NEW-Y and recomputes the estimates."
234 (when (and new-y
(or (typep new-y
'matrix-like
)
235 (typep new-y
'sequence
)))
236 (setf (field-value 'y self
) new-y
)
237 (send nil
:needs-computing self t
))
238 (field-value 'y self
))
241 intercept regression-model-proto
(&optional
(val nil set
))
242 "Message args: (&optional new-intercept)
243 With no argument returns T if the model includes an intercept term, nil if
244 not. With an argument NEW-INTERCEPT the model is changed to include or
245 exclude an intercept, according to the value of NEW-INTERCEPT."
247 (setf (field-value 'intercept self
) val
)
248 (send nil
:needs-computing self t
))
249 (field-value 'intercept self
))
252 weights regression-model-proto
(&optional
(new-w nil set
))
253 "Message args: (&optional new-w)
254 With no argument returns the weight sequence as supplied to m; NIL means
255 an unweighted model. NEW-W sets the weights sequence to NEW-W and
256 recomputes the estimates."
258 (setf (field-value 'weights self
) new-w
)
259 (send nil
:needs-computing self t
))
260 (field-value 'weights self
))
263 total-sum-of-squares regression-model-proto
()
265 Returns the total sum of squares around the mean."
266 (if (send nil self
:needs-computing
) (send nil
:compute self
))
267 (field-value 'total-sum-of-squares self
))
270 residual-sum-of-squares regression-model-proto
()
272 Returns the residual sum of squares for the model."
273 (if (send nil
:needs-computing self
) (send nil self
:compute self
))
274 (field-value 'residual-sum-of-squares self
))
277 basis regression-model-proto
()
279 Returns the indices of the variables used in fitting the model."
280 (if (send nil
:needs-computing self
) (send nil
:compute self
))
281 (field-value 'basis self
))
284 sweep-matrix regression-model-proto
()
286 Returns the swept sweep matrix. For internal use"
287 (if (send nil
:needs-computing self
) (send nil
:compute self
))
288 (field-value 'sweep-matrix self
))
291 included regression-model-proto
(&optional new-included
)
292 "Message args: (&optional new-included)
293 With no argument, NIL means a case is not used in calculating
294 estimates, and non-nil means it is used. NEW-INCLUDED is a sequence
295 of length of y of nil and t to select cases. Estimates are
297 (when (and new-included
298 (= (length new-included
) (send nil
:num-cases self
)))
299 (setf (field-value 'included self
) (copy-seq new-included
))
300 (send nil
:needs-computing self t
))
301 (if (field-value 'included self
)
302 (field-value 'included self
)
303 (repeat t
(send nil
:num-cases self
))))
306 predictor-names regression-model-proto
(&optional
(names nil set
))
307 "Message args: (&optional (names nil set))
308 With no argument returns the predictor names. NAMES sets the names."
309 (if set
(setf (field-value 'predictor-names self
) (mapcar #'string names
)))
310 (let ((p (array-dimension (send nil
:x self
) 1))
311 (p-names (field-value 'predictor-names self
)))
312 (if (not (and p-names
(= (length p-names
) p
)))
313 (setf (field-value 'predictor-names self
)
314 (mapcar #'(lambda (a) (format nil
"Variable ~a" a
))
316 (field-value 'predictor-names self
))
319 response-name regression-model-proto
(&optional
(name "Y" set
))
320 "Message args: (&optional name)
321 With no argument returns the response name. NAME sets the name."
322 (if set
(setf (field-value 'response-name self
) (if name
(string name
) "Y")))
323 (field-value 'response-name self
))
326 case-labels regression-model-proto
(&optional
(labels nil set
))
327 "Message args: (&optional labels)
328 With no argument returns the case-labels. LABELS sets the labels."
329 (if set
(setf (field-value 'case-labels self
)
331 (mapcar #'string labels
)
332 (mapcar #'(lambda (x) (format nil
"~d" x
))
333 (iseq 0 (- (send nil
:num-cases self
) 1))))))
334 (field-value 'case-labels self
))
338 ;;; None of these methods access any slots directly.
342 num-cases regression-model-proto
()
344 Returns the number of cases in the model."
345 (length (send nil
:y self
)))
348 num-included regression-model-proto
()
350 Returns the number of cases used in the computations."
351 (sum (if-else (send nil
:included self
) 1 0)))
354 num-coefs regression-model-proto
()
356 Returns the number of coefficients in the fit model (including the
357 intercept if the model includes one)."
358 (if (send nil
:intercept self
)
359 (+ 1 (length (send nil
:basis self
)))
360 (length (send nil
:basis self
))))
363 df regression-model-proto
()
365 Returns the number of degrees of freedom in the model."
366 (- (send nil
:num-included self
) (send nil
:num-coefs self
)))
369 x-matrix regression-model-proto
()
371 Returns the X matrix for the model, including a column of 1's, if
372 appropriate. Columns of X matrix correspond to entries in basis."
373 (field-value :x self
))
376 leverages regression-model-proto
()
378 Returns the diagonal elements of the hat matrix."
379 (let* ((weights (send nil
:weights self
))
380 (x (send nil
:x-matrix self
))
381 (raw-levs (m* (* (m* x
(send nil
:xtxinv self
)) x
)
382 (repeat 1 (send nil
:num-coefs self
)))))
383 (if weights
(* weights raw-levs
) raw-levs
)))
386 fit-values regression-model-proto
()
388 Returns the fitted values for the model."
389 (m* (send nil
:x-matrix self
) (send nil
:coef-estimates self
)))
392 raw-residuals regression-model-proto
()
394 Returns the raw residuals for a model."
395 (- (send nil
:y self
) (send nil
:fit-values self
)))
398 residuals regression-model-proto
()
400 Returns the raw residuals for a model without weights. If the model
401 includes weights the raw residuals times the square roots of the
402 weights are returned."
403 (let ((raw-residuals (send nil
:raw-residuals self
))
404 (weights (send nil
:weights self
)))
406 (* (sqrt weights
) raw-residuals
)
410 sum-of-squares regression-model-proto
()
412 Returns the error sum of squares for the model."
413 (send nil
:residual-sum-of-squares self
))
416 sigma-hat regression-model-proto
()
418 Returns the estimated standard deviation of the deviations about the
420 (let ((ss (send nil
:sum-of-squares self
))
421 (df (send nil
:df self
)))
422 (if (/= df
0) (sqrt (/ ss df
)))))
424 ;; for models without an intercept the 'usual' formula for R^2 can
425 ;; give negative results; hence the max.
427 r-squared regression-model-proto
()
429 Returns the sample squared multiple correlation coefficient, R
430 squared, for the regression."
431 (max (- 1 (/ (send nil
:sum-of-squares self
)
432 (send nil
:total-sum-of-squares self
)))
436 coef-estimates regression-model-proto
()
438 Returns the OLS (ordinary least squares) estimates of the regression
439 coefficients. Entries beyond the intercept correspond to entries in
441 (let ((n (array-dimension (send nil
:x self
) 1))
442 (indices (if (send nil
:intercept self
)
443 (cons 0 (+ 1 (send nil
:basis self
)))
444 (+ 1 (send nil
:basis self
))))
445 (m (send nil
:sweep-matrix self
)))
446 (coerce (compound-data-seq (select m
(+ 1 n
) indices
)) 'list
)))
449 xtxinv regression-model-proto
()
451 Returns ((X^T) X)^(-1) or ((X^T) W X)^(-1)."
452 (let ((indices (if (send nil
:intercept self
)
453 (cons 0 (1+ (send nil
:basis self
)))
454 (1+ (send nil
:basis self
)))))
455 (select (send nil
:sweep-matrix self
) indices indices
)))
458 coef-standard-errors regression-model-proto
()
460 Returns estimated standard errors of coefficients. Entries beyond the
461 intercept correspond to entries in basis."
462 (let ((s (send nil
:sigma-hat self
)))
463 (if s
(* (send nil
:sigma-hat self
)
464 (sqrt (diagonal (send nil
:xtxinv self
)))))))
467 studentized-residuals regression-model-proto
()
470 Computes the internally studentized residuals for included cases and
471 externally studentized residuals for excluded cases."
472 (let ((res (send nil
:residuals self
))
473 (lev (send nil
:leverages self
))
474 (sig (send nil
:sigma-hat self
))
475 (inc (send nil
:included self
)))
477 (/ res
(* sig
(sqrt (pmax .00001 (- 1 lev
)))))
478 (/ res
(* sig
(sqrt (+ 1 lev
)))))))
481 externally-studentized-residuals regression-model-proto
()
483 Computes the externally studentized residuals."
484 (let* ((res (send nil
:studentized-residuals self
))
485 (df (send nil
:df self
)))
486 (if-else (send nil
:included self
)
487 (* res
(sqrt (/ (- df
1) (- df
(^ res
2)))))
491 cooks-distances regression-model-proto
()
493 Computes Cook's distances."
494 (let ((lev (send nil
:leverages self
))
495 (res (/ (^
(send nil
:studentized-residuals self
) 2)
496 (send nil
:num-coefs self
))))
497 (if-else (send nil
:included self
)
498 (* res
(/ lev
(- 1 lev
)))
503 plot-residuals regression-model-proto
(&optional x-values
)
504 "Message args: (&optional x-values)
506 Opens a window with a plot of the residuals. If X-VALUES are not
507 supplied the fitted values are used. The plot can be linked to other
508 plots with the link-views function. Returns a plot object."
509 (plot-points (if x-values
511 (send nil
:fit-values self
))
512 (send nil
:residuals self
)
513 :title
"Residual Plot"
514 :point-labels
(send nil
:case-labels self
)))
517 plot-bayes-residuals regression-model-proto
(&optional x-values
)
518 "Message args: (&optional x-values)
520 Opens a window with a plot of the standardized residuals and two
521 standard error bars for the posterior distribution of the actual
522 deviations from the line. See Chaloner and Brant. If X-VALUES are not
523 supplied the fitted values are used. The plot can be linked to other
524 plots with the link-views function. Returns a plot object."
525 (let* ((r (/ (send nil
:residuals self
)
526 (send nil
:sigma-hat self
)))
527 (d (* 2 (sqrt (send nil
:leverages self
))))
530 (x-values (if x-values
532 (send nil
:fit-values self
)))
533 (p (plot-points x-values r
534 :title
"Bayes Residual Plot"
535 :point-labels
(send nil
:case-labels self
))))
538 (send nil
:plotline p a b c d nil
))
539 x-values low x-values high
)
540 (send nil
:adjust-to-data p
)
547 (defparameter *lm-ex-obj
*
548 (regression-model absorbtion iron
))