3 ;;; Copyright (c) 2008--, by A.J. Rossini <blindglobe@gmail.com>
4 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
5 ;;; Since 1991, ANSI was finally finished. Modified to match ANSI
9 ;;;; regression.lsp XLISP-STAT regression model proto and methods
10 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
11 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
12 ;;;; You may give out copies of this software; for conditions see the file
13 ;;;; COPYING included with this distribution.
15 ;;;; Incorporates modifications suggested by Sandy Weisberg.
17 ;;; This version uses lisp-matrix for underlying numerics.
19 (in-package :lisp-stat-regression-linear
)
21 ;;; Regresion Model Prototype
23 ;; The general strategy behind the fitting of models using prototypes
24 ;; is that we need to think about want the actual fits are, and then
25 ;; the fits can be used to recompute as components are changes. One
26 ;; catch here is that we'd like some notion of trace-ability, in
27 ;; particular, there is not necessarily a fixed way to take care of the
28 ;; audit trail. save-and-die might be a means of recording the final
29 ;; approach, but we are challenged by the problem of using advice and
30 ;; other such features to capture stages and steps that are considered
31 ;; along the goals of estimating a model.
33 (defclass regression-model-class
() ; (statistical-model)
38 :accessor response-variable
40 :documentation
"vector-like containing univariate response values."
45 :accessor design-matrix
47 :documentation
"matrix-like containing design/data matrix. If
48 interceptp is T, assume first column is intercept (ones), and hence
55 :documentation
"When T, treat first column of design/data matrix
56 special as intercept fit.")
61 :initarg
:covariate-names
62 :accessor covariate-names
64 :documentation
"List of strings representing covariate names.
65 Might include symbols at some point")
68 :initarg
:response-name
69 :accessor response-name
81 (:documentation
"Normal Linear Regression Model with CLOS.
82 Historical design based on LispStat."))
85 (defclass regression-model-fit-class
()
91 :type regression-model-class
)
96 :initarg
:needs-computing
97 :accessor needs-computing
)
98 ;; above modified to TRUE when the following are changed
111 :initarg
:weight-types
112 :accessor weight-types
115 ;; computational artifacts for storage; when NEEDS-COMPUTING is
116 ;; NIL, these are up-to-date.
126 (estimates-covariance-matrix
128 :initarg
:estimates-covariance-matrix
129 :accessor covariance-matrix
131 (total-sum-of-squares
135 (residual-sum-of-squares
141 :initform
"No info given."
145 (:documentation
"Normal Linear Regression Model _FIT_ through CLOS."))
147 ;;;;;;;; Helper functions
149 (defun xtxinv (x &key
(intercept T
))
152 X is NxP, resulting in PxP. Represents Var[\hat\beta], the varest for
153 \hat \beta from Y = X \beta + \eps. Done by Cholesky decomposition,
154 with LAPACK's dpotri routine, factorizing with dpotrf.
157 (let ((m1 (rand 7 5)))
160 (check-type x matrix-like
)
161 (let ((myx (if intercept
162 (bind2 (ones (matrix-dimension x
0) 1)
165 (minv-cholesky (m* (transpose myx
) myx
))))
167 ;; might add args: (method 'gelsy), or do we want to put a more
168 ;; general front end, linear-least-square, across the range of
170 (defun lm (x y
&key
(intercept T
) rcond
)
171 "fit the linear model:
174 and estimate \beta. X,Y should be in cases-by-vars form, i.e. X
175 should be n x p, Y should be n x 1. Returns estimates, n and p.
176 Probably should return a form providing the call, as well.
178 R's lm object returns: coefficients, residuals, effects, rank, fitted,
179 qr-results for numerical considerations, DF_resid. Need to
180 encapsulate into a class or struct."
181 (check-type x matrix-like
)
182 (check-type y vector-like
) ; vector-like might be too strict?
183 (assert (= (nrows y
) (nrows x
)) ; same number of obsns/cases
184 (x y
) "Can not multiply x:~S by y:~S" x y
)
185 (let ((x1 (if intercept
186 (bind2 (ones (matrix-dimension x
0) 1)
189 (let ((betahat (gelsy (m* (transpose x1
) x1
)
190 (m* (transpose x1
) y
)
192 (* (coerce (expt 2 -
52) 'double-float
)
198 (* (coerce (expt 2 -
52) 'double-float
)
202 (list betahat
; LA-SIMPLE-VECTOR-DOUBLE
203 betahat1
; LA-SLICE-VECVIEW-DOUBLE
204 (xtxinv x1
); (sebetahat betahat x y) ; TODO: write me!
205 (nrows x
) ; surrogate for n
206 (ncols x1
) ; surrogate for p
207 (v- (first betahat
) (first betahat1
)) ))))
209 (defun regression-model
210 (x y
&key
(intercept T
)
211 (predictor-names nil
) (response-name nil
) (case-labels nil
)
212 (doc "Undocumented Regression Model Instance"))
213 "Args: (x y &key (intercept T) (print T) (weights nil)
214 included predictor-names response-name case-labels)
215 X - list of independent variables or X matrix
216 Y - dependent variable.
217 INTERCEPT - T to include (default), NIL for no intercept
218 PRINT - if not NIL print summary information
219 WEIGHTS - if supplied should be the same length as Y; error
221 assumed to be inversely proportional to WEIGHTS
222 PREDICTOR-NAMES, RESPONSE-NAME, CASE-LABELS
223 - sequences of strings or symbols.
224 INCLUDED - if supplied, should be length Y or 1, with
225 elements nil to skip or T to include for computing estimates
226 (always included in residual analysis).
227 Returns a regression model object."
228 (check-type x matrix-like
)
229 (check-type y vector-like
)
231 (make-instance 'regression-model-class
234 :interceptp intercept
235 :case-labels case-labels
236 :covariate-names predictor-names
237 :response-name response-name
241 (defun fit-model (model &key
(included T
) (wgts nil
) (docs "No Docs"))
242 (let ((result (make-instance 'regression-model-fit-class
247 :estimates
(first (lm (design-matrix model
)
248 (response-variable model
)
249 :intercept
(interceptp model
)))
250 :estimates-covariance-matrix
251 (xtxinv (design-matrix model
)
252 :intercept
(interceptp model
))
256 (defmethod print-object ((obj regression-model-fit-class
) stream
)
257 (print-unreadable-object (obj stream
:type t
)
258 (format stream
"Estimates: ~%~A~%" (estimates obj
))
259 (format stream
"Covariance: ~%~A~%" (covariance-matrix obj
))
260 (format stream
"Doc: ~%~A~%" (doc-string obj
))))
262 (defmethod print-object ((obj regression-model-class
) stream
)
263 "Need better formatting and output -- clearly this is a WRONG EXAMPLE."
264 (print-unreadable-object (obj stream
:type t
)
265 (format stream
"Response: ~%~A~%" (response-variable obj
))
266 (format stream
"Design: ~%~A~%" (design-matrix obj
))
267 (format stream
"Covariates: ~%~A~%" (covariate-names obj
))
268 (format stream
"Doc: ~%~A~%" (doc-string obj
))))
270 ;;; Computing and Display Methods
275 ;; so with (= (dim X) (list n p))
276 ;; we end up with p x p p x 1
279 ;; and this can be implemented by
281 (setf XY
(bind2 X Y
:by
:row
))
282 (setf XYtXY
(m* (transpose XY
) XY
))
284 ;; which is too procedural. Sigh, I meant
286 (setf XYtXY
(let ((XY (bind2 X Y
:by
:row
)))
287 (m* (transpose XY
) XY
)))
289 ;; which at least looks lispy.