3 ;;; Copyright (c) 2005--2007, by A.J. Rossini <blindglobe@gmail.com>
4 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
5 ;;; Since 1991, ANSI was finally finished. Modified to match ANSI
9 ;;; regression.lsp XLISP-STAT regression model proto and methods
10 ;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
11 ;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
12 ;;; You may give out copies of this software; for conditions see the file
13 ;;; COPYING included with this distribution.
16 ;;; Incorporates modifications suggested by Sandy Weisberg.
19 ;;; This version uses matlisp rather than homebrewed linear algebra.
23 (defpackage :lisp-stat-regression-linear
25 :lisp-stat-object-system
28 :lisp-stat-compound-data
30 :lisp-stat-descriptive-statistics
)
31 (:shadowing-import-from
:lisp-stat-object-system
32 slot-value call-method call-next-method
)
33 (:shadowing-import-from
:lisp-stat-math
34 expt
+ -
* / ** mod rem abs
1+ 1- log exp sqrt sin cos tan
35 asin acos atan sinh cosh tanh asinh acosh atanh float random
36 truncate floor ceiling round minusp zerop plusp evenp oddp
37 < <= = /= >= > ;; complex
38 conjugate realpart imagpart phase
39 min max logand logior logxor lognot ffloor fceiling
40 ftruncate fround signum cis
)
41 (:export regression-model regression-model-proto x y intercept
43 basis weights included total-sum-of-squares
44 residual-sum-of-squares
45 predictor-names response-name case-labels
))
47 (in-package :lisp-stat-regression-linear
)
49 ;;; Regresion Model Prototype
52 ;; The general strategy behind the fitting of models using prototypes
53 ;; is that we need to think about want the actual fits are, and then
54 ;; the fits can be used to recompute as components are changes. One
55 ;; catch here is that we'd like some notion of trace-ability, in
56 ;; particular, there is not necessarily a fixed way to take care of the
57 ;; audit trail. save-nd-die might be a means of recording the final
58 ;; approach, but we are challenged by the problem of using advice and
59 ;; other such features to capture stages and steps that are considered
60 ;; along the goals of estimating a model.
62 (defvar regression-model-proto nil
63 "Prototype for all regression model instances.")
64 (defproto regression-model-proto
65 '(x y intercept sweep-matrix basis weights
68 residual-sum-of-squares
75 "Normal Linear Regression Model")
77 (defun regression-model (x y
&key
81 (included (repeat t
(length y
)))
85 (doc "Undocumented Regression Model Instance")
87 "Args: (x y &key (intercept T) (print T) (weights nil)
88 included predictor-names response-name case-labels)
89 X - list of independent variables or X matrix
90 Y - dependent variable.
91 INTERCEPT - T to include (default), NIL for no intercept
92 PRINT - if not NIL print summary information
93 WEIGHTS - if supplied should be the same length as Y; error
95 assumed to be inversely proportional to WEIGHTS
96 PREDICTOR-NAMES, RESPONSE-NAME, CASE-LABELS
97 - sequences of strings or symbols.
98 INCLUDED - if supplied should be the same length as Y, with
99 elements nil to skip a in computing estimates (but not
100 in residual analysis).
101 Returns a regression model object. To examine the model further assign the
102 result to a variable and send it messages.
103 Example (data are in file absorbtion.lsp in the sample data directory):
104 (def m (regression-model (list iron aluminum) absorbtion))
105 (send m :help) (send m :plot-residuals)"
107 ((typep x real-matrix
) x
) ;; matrix, or ...
108 ((typep x
'vector
) (list x
)) ;; vector, or ...
109 ((and (consp x
) ;; what?
110 (numberp (car x
))) (list x
))
111 (t x
))) ;; defaulting...
112 (m (send regression-model-proto
:new
)))
115 (send m
:x
(if (typep x real-matrix
) x
(apply #'bind-columns x
)))
117 (send m
:intercept intercept
)
118 (send m
:weights weights
)
119 (send m
:included included
)
120 (send m
:predictor-names predictor-names
)
121 (send m
:response-name response-name
)
122 (send m
:case-labels case-labels
)
126 (format t
"~S~%" (send m
:doc
))
127 (format t
"X: ~S~%" (send m
:x
))
128 (format t
"Y: ~S~%" (send m
:y
))))
129 (if print
(send m
:display
))
132 (defmeth regression-model-proto
:isnew
()
133 (send self
:needs-computing t
))
135 (defmeth regression-model-proto
:save
()
137 Returns an expression that will reconstruct the regression model."
138 `(regression-model ',(send self
:x
)
140 :intercept
',(send self
:intercept
)
141 :weights
',(send self
:weights
)
142 :included
',(send self
:included
)
143 :predictor-names
',(send self
:predictor-names
)
144 :response-name
',(send self
:response-name
)
145 :case-labels
',(send self
:case-labels
)))
147 ;;; Computing and Display Methods
149 (defmeth regression-model-proto
:compute
()
151 Recomputes the estimates. For internal use by other messages"
152 (let* ((included (if-else (send self
:included
) 1 0))
155 (intercept (send self
:intercept
))
156 (weights (send self
:weights
))
157 (w (if weights
(* included weights
) included
))
158 (m (make-sweep-matrix x y w
)) ;;; ERROR HERE
159 (n (array-dimension x
1))
160 (p (- (array-dimension m
0) 1))
162 (tol (* 0.001 (reduce #'* (mapcar #'standard-deviation
(column-list x
)))))
163 ;; (tol (* 0.001 (apply #'* (mapcar #'standard-deviation (column-list x)))))
166 (sweep-operator m
(iseq 1 n
) tol
)
167 (sweep-operator m
(iseq 0 n
) (cons 0.0 tol
)))))
169 "~%REMOVEME: regr-mdl-prto :compute =~A~%~A~%~A~%~A~%~A~%"
170 sweep-result x y m tss
)
171 (setf (slot-value 'sweep-matrix
) (first sweep-result
))
172 (setf (slot-value 'total-sum-of-squares
) tss
)
173 (setf (slot-value 'residual-sum-of-squares
)
174 (aref (first sweep-result
) p p
))
175 (setf (slot-value 'basis
)
176 (let ((b (remove 0 (second sweep-result
))))
177 (if b
(- (reduce #'-
(reverse b
)) 1)
178 (error "no columns could be swept"))))))
180 (defmeth regression-model-proto
:needs-computing
(&optional set
)
181 "Message args: ( &optional set )
183 If value given, sets the flag for whether (re)computation is needed to
184 update the model fits."
186 (if set
(setf (slot-value 'sweep-matrix
) nil
))
187 (null (slot-value 'sweep-matrix
)))
189 (defmeth regression-model-proto
:display
()
192 Prints the least squares regression summary. Variables not used in the fit
193 are marked as aliased."
194 (let ((coefs (coerce (send self
:coef-estimates
) 'list
))
195 (se-s (send self
:coef-standard-errors
))
197 (p-names (send self
:predictor-names
)))
198 (if (send self
:weights
)
199 (format t
"~%Weighted Least Squares Estimates:~2%")
200 (format t
"~%Least Squares Estimates:~2%"))
201 (when (send self
:intercept
)
202 (format t
"Constant ~10f ~A~%"
203 (car coefs
) (list (car se-s
)))
204 (setf coefs
(cdr coefs
))
205 (setf se-s
(cdr se-s
)))
206 (dotimes (i (array-dimension x
1))
208 ((member i
(send self
:basis
))
209 (format t
"~22a ~10f ~A~%"
210 (select p-names i
) (car coefs
) (list (car se-s
)))
211 (setf coefs
(cdr coefs
) se-s
(cdr se-s
)))
212 (t (format t
"~22a aliased~%" (select p-names i
)))))
214 (format t
"R Squared: ~10f~%" (send self
:r-squared
))
215 (format t
"Sigma hat: ~10f~%" (send self
:sigma-hat
))
216 (format t
"Number of cases: ~10d~%" (send self
:num-cases
))
217 (if (/= (send self
:num-cases
) (send self
:num-included
))
218 (format t
"Number of cases used: ~10d~%" (send self
:num-included
)))
219 (format t
"Degrees of freedom: ~10d~%" (send self
:df
))
222 ;;; Slot accessors and mutators
224 (defmeth regression-model-proto
:doc
(&optional new-doc append
)
225 "Message args: (&optional new-doc)
227 Returns the DOC-STRING as supplied to m.
228 Additionally, with an argument NEW-DOC, sets the DOC-STRING to
229 NEW-DOC. In this setting, when APPEND is T, append NEW-DOC to DOC
230 rather than doing replacement."
232 (when (and new-doc
(stringp new-doc
))
233 (setf (slot-value 'doc
)
242 (defmeth regression-model-proto
:x
(&optional new-x
)
243 "Message args: (&optional new-x)
245 With no argument returns the x matrix as supplied to m. With an
246 argument, NEW-X sets the x matrix to NEW-X and recomputes the
248 (when (and new-x
(matrixp new-x
))
249 (setf (slot-value 'x
) new-x
)
250 (send self
:needs-computing t
))
253 (defmeth regression-model-proto
:y
(&optional new-y
)
254 "Message args: (&optional new-y)
256 With no argument returns the y sequence as supplied to m. With an
257 argument, NEW-Y sets the y sequence to NEW-Y and recomputes the
261 (typep new-y
'sequence
)))
262 (let ((mat-y (coerce-seq-to-1d-col-matrix new-y
)))
263 (setf (slot-value 'y
) new-y
)
264 (send self
:needs-computing t
)))
267 (defmeth regression-model-proto
:intercept
(&optional
(val nil set
))
268 "Message args: (&optional new-intercept)
270 With no argument returns T if the model includes an intercept term,
271 nil if not. With an argument NEW-INTERCEPT the model is changed to
272 include or exclude an intercept, according to the value of
275 (setf (slot-value 'intercept
) val
)
276 (send self
:needs-computing t
))
277 (slot-value 'intercept
))
279 (defmeth regression-model-proto
:weights
(&optional
(new-w nil set
))
280 "Message args: (&optional new-w)
282 With no argument returns the weight sequence as supplied to m; NIL
283 means an unweighted model. NEW-W sets the weights sequence to NEW-W
284 and recomputes the estimates."
286 (setf (slot-value 'weights
) new-w
)
287 (send self
:needs-computing t
))
288 (slot-value 'weights
))
290 (defmeth regression-model-proto
:total-sum-of-squares
()
293 Returns the total sum of squares around the mean."
294 (if (send self
:needs-computing
) (send self
:compute
))
295 (slot-value 'total-sum-of-squares
))
297 (defmeth regression-model-proto
:residual-sum-of-squares
()
300 Returns the residual sum of squares for the model."
301 (if (send self
:needs-computing
) (send self
:compute
))
302 (slot-value 'residual-sum-of-squares
))
304 (defmeth regression-model-proto
:basis
()
307 Returns the indices of the variables used in fitting the model, in a
309 (if (send self
:needs-computing
)
310 (send self
:compute
))
311 (if (typep (slot-value 'basis
) 'sequence
)
313 (list (slot-value 'basis
))))
316 (defmeth regression-model-proto
:sweep-matrix
()
319 Returns the swept sweep matrix. For internal use"
320 (if (send self
:needs-computing
)
321 (send self
:compute
))
322 (slot-value 'sweep-matrix
))
324 (defmeth regression-model-proto
:included
(&optional new-included
)
325 "Message args: (&optional new-included)
327 With no argument, NIL means a case is not used in calculating
328 estimates, and non-nil means it is used. NEW-INCLUDED is a sequence
329 of length of y of nil and t to select cases. Estimates are
331 (when (and new-included
332 (= (length new-included
) (send self
:num-cases
)))
333 (setf (slot-value 'included
) (copy-seq new-included
))
334 (send self
:needs-computing t
))
335 (if (slot-value 'included
)
336 (slot-value 'included
)
337 (repeat t
(send self
:num-cases
))))
339 (defmeth regression-model-proto
:predictor-names
(&optional
(names nil set
))
340 "Message args: (&optional (names nil set))
342 With no argument returns the predictor names. NAMES sets the names."
343 (if set
(setf (slot-value 'predictor-names
) (mapcar #'string names
)))
344 (let ((p (array-dimension (send self
:x
) 1))
345 (p-names (slot-value 'predictor-names
)))
346 (if (not (and p-names
(= (length p-names
) p
)))
347 (setf (slot-value 'predictor-names
)
348 (mapcar #'(lambda (a) (format nil
"Variable ~a" a
))
350 (slot-value 'predictor-names
))
352 (defmeth regression-model-proto
:response-name
(&optional
(name "Y" set
))
353 "Message args: (&optional name)
355 With no argument returns the response name. NAME sets the name."
357 (if set
(setf (slot-value 'response-name
) (if name
(string name
) "Y")))
358 (slot-value 'response-name
))
360 (defmeth regression-model-proto
:case-labels
(&optional
(labels nil set
))
361 "Message args: (&optional labels)
362 With no argument returns the case-labels. LABELS sets the labels."
363 (if set
(setf (slot-value 'case-labels
)
365 (mapcar #'string labels
)
366 (mapcar #'(lambda (x) (format nil
"~d" x
))
367 (iseq 0 (- (send self
:num-cases
) 1))))))
368 (slot-value 'case-labels
))
372 ;;; None of these methods access any slots directly.
375 (defmeth regression-model-proto
:num-cases
()
377 Returns the number of cases in the model."
378 (length (send self
:y
)))
380 (defmeth regression-model-proto
:num-included
()
382 Returns the number of cases used in the computations."
383 (sum (if-else (send self
:included
) 1 0)))
385 (defmeth regression-model-proto
:num-coefs
()
387 Returns the number of coefficients in the fit model (including the
388 intercept if the model includes one)."
389 (if (send self
:intercept
)
390 (+ 1 (length (send self
:basis
)))
391 (length (send self
:basis
))))
393 (defmeth regression-model-proto
:df
()
395 Returns the number of degrees of freedom in the model."
396 (- (send self
:num-included
) (send self
:num-coefs
)))
398 (defmeth regression-model-proto
:x-matrix
()
400 Returns the X matrix for the model, including a column of 1's, if
401 appropriate. Columns of X matrix correspond to entries in basis."
402 (let ((m (select (send self
:x
)
403 (iseq 0 (- (send self
:num-cases
) 1))
404 (send self
:basis
))))
405 (if (send self
:intercept
)
406 (bind-columns (repeat 1 (send self
:num-cases
)) m
)
409 (defmeth regression-model-proto
:leverages
()
411 Returns the diagonal elements of the hat matrix."
412 (let* ((weights (send self
:weights
))
413 (x (send self
:x-matrix
))
415 (matmult (* (matmult x
(send self
:xtxinv
)) x
)
416 (repeat 1 (send self
:num-coefs
)))))
417 (if weights
(* weights raw-levs
) raw-levs
)))
419 (defmeth regression-model-proto
:fit-values
()
421 Returns the fitted values for the model."
422 (matmult (send self
:x-matrix
) (send self
:coef-estimates
)))
424 (defmeth regression-model-proto
:raw-residuals
()
426 Returns the raw residuals for a model."
427 (- (send self
:y
) (send self
:fit-values
)))
429 (defmeth regression-model-proto
:residuals
()
431 Returns the raw residuals for a model without weights. If the model
432 includes weights the raw residuals times the square roots of the weights
434 (let ((raw-residuals (send self
:raw-residuals
))
435 (weights (send self
:weights
)))
436 (if weights
(* (sqrt weights
) raw-residuals
) raw-residuals
)))
438 (defmeth regression-model-proto
:sum-of-squares
()
440 Returns the error sum of squares for the model."
441 (send self
:residual-sum-of-squares
))
443 (defmeth regression-model-proto
:sigma-hat
()
445 Returns the estimated standard deviation of the deviations about the
447 (let ((ss (send self
:sum-of-squares
))
448 (df (send self
:df
)))
449 (if (/= df
0) (sqrt (/ ss df
)))))
451 ;; for models without an intercept the 'usual' formula for R^2 can give
452 ;; negative results; hence the max.
453 (defmeth regression-model-proto
:r-squared
()
455 Returns the sample squared multiple correlation coefficient, R squared, for
457 (max (- 1 (/ (send self
:sum-of-squares
) (send self
:total-sum-of-squares
)))
460 (defmeth regression-model-proto
:coef-estimates
()
463 Returns the OLS (ordinary least squares) estimates of the regression
464 coefficients. Entries beyond the intercept correspond to entries in
466 (let ((n (array-dimension (send self
:x
) 1))
467 (indices (flatten-list
468 (if (send self
:intercept
)
469 (list 0 (+ 1 (send self
:basis
))) ;; was cons -- why?
470 (list (+ 1 (send self
:basis
))))))
471 (m (send self
:sweep-matrix
)))
472 (format t
"~%REMOVEME2: Coef-ests: ~A ~% ~A ~% ~A ~% ~A"
473 m n indices
(send self
:basis
))
474 (coerce (compound-data-seq (select m
(+ 1 n
) indices
)) 'list
)))
476 (defmeth regression-model-proto
:xtxinv
()
478 Returns ((X^T) X)^(-1) or ((X^T) W X)^(-1)."
479 (let ((indices (if (send self
:intercept
)
480 (cons 0 (1+ (send self
:basis
)))
481 (1+ (send self
:basis
)))))
482 (select (send self
:sweep-matrix
) indices indices
)))
484 (defmeth regression-model-proto
:coef-standard-errors
()
486 Returns estimated standard errors of coefficients. Entries beyond the
487 intercept correspond to entries in basis."
488 (let ((s (send self
:sigma-hat
)))
489 (if s
(* (send self
:sigma-hat
) (sqrt (diagonal (send self
:xtxinv
)))))))
491 (defmeth regression-model-proto
:studentized-residuals
()
493 Computes the internally studentized residuals for included cases and externally studentized residuals for excluded cases."
494 (let ((res (send self
:residuals
))
495 (lev (send self
:leverages
))
496 (sig (send self
:sigma-hat
))
497 (inc (send self
:included
)))
499 (/ res
(* sig
(sqrt (pmax .00001 (- 1 lev
)))))
500 (/ res
(* sig
(sqrt (+ 1 lev
)))))))
502 (defmeth regression-model-proto
:externally-studentized-residuals
()
504 Computes the externally studentized residuals."
505 (let* ((res (send self
:studentized-residuals
))
506 (df (send self
:df
)))
507 (if-else (send self
:included
)
508 (* res
(sqrt (/ (- df
1) (- df
(^ res
2)))))
511 (defmeth regression-model-proto
:cooks-distances
()
513 Computes Cook's distances."
514 (let ((lev (send self
:leverages
))
515 (res (/ (^
(send self
:studentized-residuals
) 2)
516 (send self
:num-coefs
))))
517 (if-else (send self
:included
) (* res
(/ lev
(- 1 lev
) )) (* res lev
))))
520 (defun plot-points (x y
&rest args
)
522 (declare (ignore x y args
))
523 (error "Graphics not implemented yet."))
525 ;; Can not plot points yet!!
526 (defmeth regression-model-proto
:plot-residuals
(&optional x-values
)
527 "Message args: (&optional x-values)
528 Opens a window with a plot of the residuals. If X-VALUES are not supplied
529 the fitted values are used. The plot can be linked to other plots with the
530 link-views function. Returns a plot object."
531 (plot-points (if x-values x-values
(send self
:fit-values
))
532 (send self
:residuals
)
533 :title
"Residual Plot"
534 :point-labels
(send self
:case-labels
)))
536 (defmeth regression-model-proto
:plot-bayes-residuals
538 "Message args: (&optional x-values)
540 Opens a window with a plot of the standardized residuals and two
541 standard error bars for the posterior distribution of the actual
542 deviations from the line. See Chaloner and Brant. If X-VALUES are not
543 supplied the fitted values are used. The plot can be linked to other
544 plots with the link-views function. Returns a plot object."
546 (let* ((r (/ (send self
:residuals
)
547 (send self
:sigma-hat
)))
548 (d (* 2 (sqrt (send self
:leverages
))))
551 (x-values (if x-values x-values
(send self
:fit-values
)))
552 (p (plot-points x-values r
553 :title
"Bayes Residual Plot"
554 :point-labels
(send self
:case-labels
))))
555 (map 'list
#'(lambda (a b c d
) (send p
:plotline a b c d nil
))
556 x-values low x-values high
)
557 (send p
:adjust-to-data
)