4 ;;;; regression.lsp XLISP-STAT regression model proto and methods
5 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
6 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
7 ;;;; You may give out copies of this software; for conditions see the file
8 ;;;; COPYING included with this distribution.
11 ;;;; Incorporates modifications suggested by Sandy Weisberg.
13 (asdf:oos
'asdf
:load-op
'clon
)
15 (defpackage :regression-clon
19 lisp-stat-data-examples
)
20 (:export regression-model regression-model-proto x y intercept
21 sweep-matrix basis weights included total-sum-of-squares
22 residual-sum-of-squares predictor-names response-name
25 (in-package :regression-clon
)
29 (defun flatten-list (lst)
30 "Flattens a list of lists into a single list. Only useful when
31 we've mucked up data. Sign of usage means poor coding!"
32 (cond ((null lst
) ;; endp?
35 (append (flatten-list (car lst
)) (flatten-list (cdr lst
))))
41 Repeats VALS. If TIMES is a number and VALS is a non-null, non-array atom,
42 a list of length TIMES with all elements eq to VALS is returned. If VALS
43 is a list and TIMES is a number then VALS is appended TIMES times. If
44 TIMES is a list of numbers then VALS must be a list of equal length and
45 the simpler version of repeat is mapped down the two lists.
46 Examples: (repeat 2 5) returns (2 2 2 2 2)
47 (repeat '(1 2) 3) returns (1 2 1 2 1 2)
49 (repeat '(4 5 6) '(1 2 3)) returns (4 5 5 6 6 6)
50 (repeat '((4) (5 6)) '(2 3)) returns (4 4 5 6 5 6 5 6)"
52 (flatten-list (append (repeat (list a
) (- b
1)) (list a
)))
55 (cond ((compound-data-p b
)
56 (let* ((reps (coerce (compound-data-seq (map-elements #'repeat a b
))
59 (tail (last (first reps
))))
60 (dolist (next (rest reps
) result
)
62 (setf (rest tail
) next
)
63 (setf tail
(last next
))))))
64 (t (let* ((a (if (compound-data-p a
)
65 (coerce (compound-data-seq a
) 'list
)
69 (let ((next (copy-list a
)))
70 (if result
(setf (rest (last next
)) result
))
71 (setf result next
)))))))
76 ;;;; Regresion Model Prototype
78 (define-prototype regression-model-proto
79 (:documentation
"port of proto to CLON for Normal Linear Regression Model.")
80 x y intercept sweep-matrix basis weights
81 included total-sum-of-squares residual-sum-of-squares
82 predictor-names response-name case-labels
)
84 ;; The doc for this function string is at the limit of XLISP's string
85 ;; constant size - making it longer may cause problems
86 (defun regression-model (x y
&key
90 (included (repeat t
(length y
)))
94 "Args: (x y &key (intercept T) (print T) weights
95 included predictor-names response-name case-labels)
96 X - list of independent variables or X matrix
97 Y - dependent variable.
98 INTERCEPT - T to include (default), NIL for no intercept
99 PRINT - if not NIL print summary information
100 WEIGHTS - if supplied should be the same length as Y; error variances are
101 assumed to be inversely proportional to WEIGHTS
104 CASE-LABELS - sequences of strings or symbols.
105 INCLUDED - if supplied should be the same length as Y, with elements nil
106 to skip a in computing estimates (but not in residual analysis).
107 Returns a regression model object. To examine the model further assign the
108 result to a variable and send it messages.
109 Example (data are in file absorbtion.lsp in the sample data directory/folder):
110 (def m (regression-model (list iron aluminum) absorbtion))
112 (send m :plot-residuals)"
113 (check-type x matrix-like
)
114 (let ((m (clone =regression-model-proto
=)))
115 (setf (field-value x m
) x
)
116 (setf (field-value y m
) y
)
117 (send nil
:intercept m intercept
)
118 (send nil
:weights m weights
)
119 (send nil
:included m included
)
120 (send nil
:predictor-names m predictor-names
)
121 (send nil
:response-name m response-name
)
122 (send nil
:case-labels m case-labels
)
123 (if print
(send nil
:display m
))
127 is-new regression-model-proto
()
128 (send nil self
:needs-computing t
))
131 save regression-model-proto
()
133 Returns an expression that will reconstruct the regression model."
134 `(regression-model ',(send nil
:x self
)
136 :intercept
',(send nil
:intercept self
)
137 :weights
',(send nil
:weights self
)
138 :included
',(send nil
:included self
)
139 :predictor-names
',(send nil
:predictor-names self
)
140 :response-name
',(send nil
:response-name self
)
141 :case-labels
',(send nil
:case-labels self
)))
144 ;;; Computing and Display Methods
148 compute regression-model-proto
()
149 "Message args: (). Recomputes the estimates. For internal use by other messages"
150 (let* ((included (if-else (send nil
:included
) 1 0))
151 (x (send nil
:x self
))
152 (y (send nil
:y self
))
153 (intercept (send nil
:intercept self
))
154 (weights (send nil
:weights self
))
155 (w (if weights
(* included weights
) included
))
156 ;; (m (make-sweep-matrix x y w))
157 (n (array-dimension x
1))
158 (p (- (array-dimension m
0) 1))
160 (tol (* .0001 (mapcar #'standard-deviation
(column-list x
))))
163 (sweep-operator m
(iseq 1 n
) tol
)
164 (sweep-operator m
(iseq 0 n
) (cons 0.0 tol
)))))
165 (setf (field-value 'sweep-matrix self
) (first sweep-result
))
166 (setf (field-value 'total-sum-of-squares self
) tss
)
167 (setf (field-value 'residual-sum-of-squares self
)
168 (aref (first sweep-result
) p p
))
169 (setf (field-value 'basis self
)
170 (let ((b (remove 0 (second sweep-result
))))
173 (error "no columns could be swept"))))))
176 needs-computing regression-model-proto
(&optional set
)
177 "Toggle switch as needed to figure out if we recompute."
178 (if set
(setf (field-value 'sweep-matrix self
) nil
))
179 (null (field-value 'sweep-matrix self
)))
182 display regression-model-proto
()
184 Prints the least squares regression summary. Variables not used in the fit
185 are marked as aliased."
186 (let ((coefs (coerce (send nil
:coef-estimates self
) 'list
))
187 (se-s (send nil
:coef-standard-errors self
))
188 (x (send nil
:x self
))
189 (p-names (send nil
:predictor-names self
)))
190 (if (send nil
:weights self
)
191 (format t
"~%Weighted Least Squares Estimates:~2%")
192 (format t
"~%Least Squares Estimates:~2%"))
193 (when (send nil
:intercept self
)
194 (format t
"Constant ~10f ~A~%"
195 (car coefs
) (list (car se-s
)))
196 (setf coefs
(cdr coefs
))
197 (setf se-s
(cdr se-s
)))
198 (dotimes (i (array-dimension x
1))
200 ((member i
(send nil
:basis self
))
201 (format t
"~22a ~10f ~A~%"
202 (select p-names i
) (car coefs
) (list (car se-s
)))
203 (setf coefs
(cdr coefs
) se-s
(cdr se-s
)))
204 (t (format t
"~22a aliased~%" (select p-names i
)))))
206 (format t
"R Squared: ~10f~%" (send nil
:r-squared self
))
207 (format t
"Sigma hat: ~10f~%" (send nil
:sigma-hat self
))
208 (format t
"Number of cases: ~10d~%" (send nil
:num-cases self
))
209 (if (/= (send nil
:num-cases self
) (send nil
:num-included self
))
210 (format t
"Number of cases used: ~10d~%" (send nil
:num-included self
)))
211 (format t
"Degrees of freedom: ~10d~%" (send nil
:df self
))
215 ;;; Slot accessors and mutators
219 x regression-model-proto
(&optional new-x
)
220 "Message args: (&optional new-x)
221 With no argument returns the x matrix as supplied to m. With an argument
222 NEW-X sets the x matrix to NEW-X and recomputes the estimates."
223 (when (and new-x
(typep new-x
'matrix-like
))
224 (setf (field-value 'x self
) new-x
)
225 (send nil
:needs-computing self t
))
226 (field-value 'x self
))
229 y regression-model-proto
(&optional new-y
)
230 "Message args: (&optional new-y)
231 With no argument returns the y sequence as supplied to m. With an argument
232 NEW-Y sets the y sequence to NEW-Y and recomputes the estimates."
233 (when (and new-y
(or (typep new-y
'matrix-like
)
234 (typep new-y
'sequence
)))
235 (setf (field-value 'y self
) new-y
)
236 (send nil
:needs-computing self t
))
237 (field-value 'y self
))
240 intercept regression-model-proto
(&optional
(val nil set
))
241 "Message args: (&optional new-intercept)
242 With no argument returns T if the model includes an intercept term, nil if
243 not. With an argument NEW-INTERCEPT the model is changed to include or
244 exclude an intercept, according to the value of NEW-INTERCEPT."
246 (setf (field-value 'intercept self
) val
)
247 (send nil
:needs-computing self t
))
248 (field-value 'intercept self
))
251 weights regression-model-proto
(&optional
(new-w nil set
))
252 "Message args: (&optional new-w)
253 With no argument returns the weight sequence as supplied to m; NIL means
254 an unweighted model. NEW-W sets the weights sequence to NEW-W and
255 recomputes the estimates."
257 (setf (field-value 'weights self
) new-w
)
258 (send nil
:needs-computing self t
))
259 (field-value 'weights self
))
262 total-sum-of-squares regression-model-proto
()
264 Returns the total sum of squares around the mean."
265 (if (send nil self
:needs-computing
) (send nil
:compute self
))
266 (field-value 'total-sum-of-squares self
))
269 residual-sum-of-squares regression-model-proto
()
271 Returns the residual sum of squares for the model."
272 (if (send nil
:needs-computing self
) (send nil self
:compute self
))
273 (field-value 'residual-sum-of-squares self
))
276 basis regression-model-proto
()
278 Returns the indices of the variables used in fitting the model."
279 (if (send nil
:needs-computing self
) (send nil
:compute self
))
280 (field-value 'basis self
))
283 sweep-matrix regression-model-proto
()
285 Returns the swept sweep matrix. For internal use"
286 (if (send nil
:needs-computing self
) (send nil
:compute self
))
287 (field-value 'sweep-matrix self
))
290 included regression-model-proto
(&optional new-included
)
291 "Message args: (&optional new-included)
292 With no argument, NIL means a case is not used in calculating
293 estimates, and non-nil means it is used. NEW-INCLUDED is a sequence
294 of length of y of nil and t to select cases. Estimates are
296 (when (and new-included
297 (= (length new-included
) (send nil
:num-cases self
)))
298 (setf (field-value 'included self
) (copy-seq new-included
))
299 (send nil
:needs-computing self t
))
300 (if (field-value 'included self
)
301 (field-value 'included self
)
302 (repeat t
(send nil
:num-cases self
))))
305 predictor-names regression-model-proto
(&optional
(names nil set
))
306 "Message args: (&optional (names nil set))
307 With no argument returns the predictor names. NAMES sets the names."
308 (if set
(setf (field-value 'predictor-names self
) (mapcar #'string names
)))
309 (let ((p (array-dimension (send nil
:x self
) 1))
310 (p-names (field-value 'predictor-names self
)))
311 (if (not (and p-names
(= (length p-names
) p
)))
312 (setf (field-value 'predictor-names self
)
313 (mapcar #'(lambda (a) (format nil
"Variable ~a" a
))
315 (field-value 'predictor-names self
))
318 response-name regression-model-proto
(&optional
(name "Y" set
))
319 "Message args: (&optional name)
320 With no argument returns the response name. NAME sets the name."
321 (if set
(setf (field-value 'response-name self
) (if name
(string name
) "Y")))
322 (field-value 'response-name self
))
325 case-labels regression-model-proto
(&optional
(labels nil set
))
326 "Message args: (&optional labels)
327 With no argument returns the case-labels. LABELS sets the labels."
328 (if set
(setf (field-value 'case-labels self
)
330 (mapcar #'string labels
)
331 (mapcar #'(lambda (x) (format nil
"~d" x
))
332 (iseq 0 (- (send nil
:num-cases self
) 1))))))
333 (field-value 'case-labels self
))
337 ;;; None of these methods access any slots directly.
341 num-cases regression-model-proto
()
343 Returns the number of cases in the model."
344 (length (send nil
:y self
)))
347 num-included regression-model-proto
()
349 Returns the number of cases used in the computations."
350 (sum (if-else (send nil
:included self
) 1 0)))
353 num-coefs regression-model-proto
()
355 Returns the number of coefficients in the fit model (including the
356 intercept if the model includes one)."
357 (if (send nil
:intercept self
)
358 (+ 1 (length (send nil
:basis self
)))
359 (length (send nil
:basis self
))))
362 df regression-model-proto
()
364 Returns the number of degrees of freedom in the model."
365 (- (send nil
:num-included self
) (send nil
:num-coefs self
)))
368 x-matrix regression-model-proto
()
370 Returns the X matrix for the model, including a column of 1's, if
371 appropriate. Columns of X matrix correspond to entries in basis."
372 (field-value :x self
))
375 leverages regression-model-proto
()
377 Returns the diagonal elements of the hat matrix."
378 (let* ((weights (send nil
:weights self
))
379 (x (send nil
:x-matrix self
))
380 (raw-levs (m* (* (m* x
(send nil
:xtxinv self
)) x
)
381 (repeat 1 (send nil
:num-coefs self
)))))
382 (if weights
(* weights raw-levs
) raw-levs
)))
385 fit-values regression-model-proto
()
387 Returns the fitted values for the model."
388 (m* (send nil
:x-matrix self
) (send nil
:coef-estimates self
)))
391 raw-residuals regression-model-proto
()
393 Returns the raw residuals for a model."
394 (- (send nil
:y self
) (send nil
:fit-values self
)))
397 residuals regression-model-proto
()
399 Returns the raw residuals for a model without weights. If the model
400 includes weights the raw residuals times the square roots of the
401 weights are returned."
402 (let ((raw-residuals (send nil
:raw-residuals self
))
403 (weights (send nil
:weights self
)))
405 (* (sqrt weights
) raw-residuals
)
409 sum-of-squares regression-model-proto
()
411 Returns the error sum of squares for the model."
412 (send nil
:residual-sum-of-squares self
))
415 sigma-hat regression-model-proto
()
417 Returns the estimated standard deviation of the deviations about the
419 (let ((ss (send nil
:sum-of-squares self
))
420 (df (send nil
:df self
)))
421 (if (/= df
0) (sqrt (/ ss df
)))))
423 ;; for models without an intercept the 'usual' formula for R^2 can
424 ;; give negative results; hence the max.
426 r-squared regression-model-proto
()
428 Returns the sample squared multiple correlation coefficient, R
429 squared, for the regression."
430 (max (- 1 (/ (send nil
:sum-of-squares self
)
431 (send nil
:total-sum-of-squares self
)))
435 coef-estimates regression-model-proto
()
437 Returns the OLS (ordinary least squares) estimates of the regression
438 coefficients. Entries beyond the intercept correspond to entries in
440 (let ((n (array-dimension (send nil
:x self
) 1))
441 (indices (if (send nil
:intercept self
)
442 (cons 0 (+ 1 (send nil
:basis self
)))
443 (+ 1 (send nil
:basis self
))))
444 (m (send nil
:sweep-matrix self
)))
445 (coerce (compound-data-seq (select m
(+ 1 n
) indices
)) 'list
)))
448 xtxinv regression-model-proto
()
450 Returns ((X^T) X)^(-1) or ((X^T) W X)^(-1)."
451 (let ((indices (if (send nil
:intercept self
)
452 (cons 0 (1+ (send nil
:basis self
)))
453 (1+ (send nil
:basis self
)))))
454 (select (send nil
:sweep-matrix self
) indices indices
)))
457 coef-standard-errors regression-model-proto
()
459 Returns estimated standard errors of coefficients. Entries beyond the
460 intercept correspond to entries in basis."
461 (let ((s (send nil
:sigma-hat self
)))
462 (if s
(* (send nil
:sigma-hat self
)
463 (sqrt (diagonal (send nil
:xtxinv self
)))))))
466 studentized-residuals regression-model-proto
()
469 Computes the internally studentized residuals for included cases and
470 externally studentized residuals for excluded cases."
471 (let ((res (send nil
:residuals self
))
472 (lev (send nil
:leverages self
))
473 (sig (send nil
:sigma-hat self
))
474 (inc (send nil
:included self
)))
476 (/ res
(* sig
(sqrt (pmax .00001 (- 1 lev
)))))
477 (/ res
(* sig
(sqrt (+ 1 lev
)))))))
480 externally-studentized-residuals regression-model-proto
()
482 Computes the externally studentized residuals."
483 (let* ((res (send nil
:studentized-residuals self
))
484 (df (send nil
:df self
)))
485 (if-else (send nil
:included self
)
486 (* res
(sqrt (/ (- df
1) (- df
(^ res
2)))))
490 cooks-distances regression-model-proto
()
492 Computes Cook's distances."
493 (let ((lev (send nil
:leverages self
))
494 (res (/ (^
(send nil
:studentized-residuals self
) 2)
495 (send nil
:num-coefs self
))))
496 (if-else (send nil
:included self
)
497 (* res
(/ lev
(- 1 lev
)))
502 plot-residuals regression-model-proto
(&optional x-values
)
503 "Message args: (&optional x-values)
505 Opens a window with a plot of the residuals. If X-VALUES are not
506 supplied the fitted values are used. The plot can be linked to other
507 plots with the link-views function. Returns a plot object."
508 (plot-points (if x-values
510 (send nil
:fit-values self
))
511 (send nil
:residuals self
)
512 :title
"Residual Plot"
513 :point-labels
(send nil
:case-labels self
)))
516 plot-bayes-residuals regression-model-proto
(&optional x-values
)
517 "Message args: (&optional x-values)
519 Opens a window with a plot of the standardized residuals and two
520 standard error bars for the posterior distribution of the actual
521 deviations from the line. See Chaloner and Brant. If X-VALUES are not
522 supplied the fitted values are used. The plot can be linked to other
523 plots with the link-views function. Returns a plot object."
524 (let* ((r (/ (send nil
:residuals self
)
525 (send nil
:sigma-hat self
)))
526 (d (* 2 (sqrt (send nil
:leverages self
))))
529 (x-values (if x-values
531 (send nil
:fit-values self
)))
532 (p (plot-points x-values r
533 :title
"Bayes Residual Plot"
534 :point-labels
(send nil
:case-labels self
))))
537 (send nil
:plotline p a b c d nil
))
538 x-values low x-values high
)
539 (send nil
:adjust-to-data p
)
546 (defparameter *lm-ex-obj
*
547 (regression-model absorbtion iron
))