3 ;;; Copyright (c) 2008--, by A.J. Rossini <blindglobe@gmail.com>
4 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
5 ;;; Since 1991, ANSI was finally finished. Modified to match ANSI
8 ;;; This version uses LAPACK and lisp-matrix for underlying numerics.
9 ;;; No longer contains code from XLispStat.
11 (in-package :lisp-stat-regression-linear
)
13 ;; The general strategy behind the fitting of models using prototypes
14 ;; is that we need to think about want the actual fits are, and then
15 ;; the fits can be used to recompute as components are changes. One
16 ;; catch here is that we'd like some notion of trace-ability, in
17 ;; particular, there is not necessarily a fixed way to take care of the
18 ;; audit trail. save-and-die might be a means of recording the final
19 ;; approach, but we are challenged by the problem of using advice and
20 ;; other such features to capture stages and steps that are considered
21 ;; along the goals of estimating a model.
23 ;; In migrating the thought process to R, we use more of a cross
24 ;; between the S3 and prototype approaches, but some of the thinking
27 ;; statistical-model shuld contain some of the ingredients for
28 ;; traceability and embodiment of statistical characteristics.
30 (defclass regression-model-class
() ; (statistical-model)
34 :accessor dependent-response
36 :documentation
"univariate responses/dependent var.")
40 :accessor design-matrix
42 :documentation
"design/data matrix, complete. when interceptp T,
43 first column is intercept (ones), and hence special.")
49 :documentation
"special indicator of first column status.")
54 :initarg
:covariate-names
55 :accessor covariate-names
57 :documentation
"List of strings representing covariate names.
58 Might include symbols at some point")
61 :initarg
:response-name
62 :accessor response-name
74 (:documentation
"Normal Linear Regression Model with CLOS."))
76 (defclass regression-model-fit-class
()
81 :type regression-model-class
82 :documentation
"data/design")
85 :initarg
:needs-computing
86 :accessor needs-computing
87 :documentation
"ensure that this is set to TRUE when numerically
88 dependent pieces are modified (not metadata, for example). Check
89 before access of results and printouts, so as to recompute if
103 :initarg
:weight-types
104 :accessor weight-types
107 ;; computational artifacts for storage; when NEEDS-COMPUTING is
108 ;; NIL, these are up-to-date.
118 (estimates-covariance-matrix
120 :initarg
:estimates-covariance-matrix
121 :accessor covariance-matrix
123 (total-sum-of-squares
127 (residual-sum-of-squares
132 :initform
"No info given."
136 (:documentation
"Normal Linear Regression Model _FIT_ through CLOS."))
138 ;;;;;;;; Helper functions
140 ;;(declaim (ftype (function (matrix-like) matrix-like) xtxinv))
145 X is NxP, resulting in PxP. Represents Var[\hat\beta], the varest for
146 \hat \beta from Y = X \beta + \eps. Done by Cholesky decomposition,
147 with LAPACK's dpotri routine, factorizing with dpotrf.
150 (let ((m1 (rand 7 5)))
153 (check-type x matrix-like
)
154 (minv-cholesky (m* (transpose x
) x
)))
157 (let ((myx (if intercept
158 (bind2 (ones (matrix-dimension x
0) 1)
163 ;; might add args: (method 'gelsy), or do we want to put a more
164 ;; general front end, linear-least-square, across the range of
166 (defun lm (x y
&key
(intercept T
) rcond
)
167 "fit the linear model:
170 and estimate \beta. X,Y should be in cases-by-vars form, i.e. X
171 should be n x p, Y should be n x 1. Returns estimates, n and p.
172 Probably should return a form providing the call, as well.
174 R's lm object returns: coefficients, residuals, effects, rank, fitted,
175 qr-results for numerical considerations, DF_resid. Need to
176 encapsulate into a class or struct."
177 (check-type x matrix-like
)
178 (check-type y vector-like
) ; vector-like might be too strict?
179 (assert (= (nrows y
) (nrows x
)) ; same number of obsns/cases
180 (x y
) "Can not multiply x:~S by y:~S" x y
)
181 (let ((x1 (if intercept
182 (bind2 (ones (matrix-dimension x
0) 1)
185 (let ((betahat (gelsy (m* (transpose x1
) x1
)
186 (m* (transpose x1
) y
)
188 (* (coerce (expt 2 -
52) 'double-float
)
194 (* (coerce (expt 2 -
52) 'double-float
)
198 (list betahat
; LA-SIMPLE-VECTOR-DOUBLE
199 betahat1
; LA-SLICE-VECVIEW-DOUBLE
200 (xtxinv x1
); (sebetahat betahat x y) ; TODO: write me!
201 (nrows x
) ; surrogate for n
202 (ncols x1
) ; surrogate for p
203 (v- (first betahat
) (first betahat1
)) ))))
205 (defun regression-model
206 (x y
&key
(intercept T
)
207 (predictor-names nil
) (response-name nil
) (case-labels nil
)
208 (doc "Undocumented Regression Model Instance"))
209 "Args: (x y &key (intercept T) (print T) (weights nil)
210 included predictor-names response-name case-labels)
211 X - list of independent variables or X matrix
212 Y - dependent variable.
213 INTERCEPT - T to include (default), NIL for no intercept
214 PRINT - if not NIL print summary information
215 WEIGHTS - if supplied should be the same length as Y; error
217 assumed to be inversely proportional to WEIGHTS
218 PREDICTOR-NAMES, RESPONSE-NAME, CASE-LABELS
219 - sequences of strings or symbols.
220 INCLUDED - if supplied, should be length Y or 1, with
221 elements nil to skip or T to include for computing estimates
222 (always included in residual analysis).
223 Returns a regression model object."
224 (check-type x matrix-like
)
225 (check-type y vector-like
)
227 (make-instance 'regression-model-class
230 :interceptp intercept
231 :case-labels case-labels
232 :covariate-names predictor-names
233 :response-name response-name
237 (defun fit-model (model &key
(included T
) (wgts nil
) (docs "No Docs"))
238 (let ((result (make-instance 'regression-model-fit-class
243 :estimates
(first (lm (design-matrix model
)
244 (dependent-response model
)
245 :intercept
(interceptp model
)))
246 :estimates-covariance-matrix
247 (xtxinv (design-matrix model
))
251 (defmethod print-object ((obj regression-model-fit-class
) stream
)
252 (print-unreadable-object (obj stream
:type t
)
253 (format stream
"Estimates: ~%~A~%" (estimates obj
))
254 (format stream
"Covariance: ~%~A~%" (covariance-matrix obj
))
255 (format stream
"Doc: ~%~A~%" (doc-string obj
))))
257 (defmethod print-object ((obj regression-model-class
) stream
)
258 "Need better formatting and output -- clearly this is a WRONG EXAMPLE."
259 (print-unreadable-object (obj stream
:type t
)
260 (format stream
"Response: ~%~A~%" (dependent-response obj
))
261 (format stream
"Design: ~%~A~%" (design-matrix obj
))
262 (format stream
"Covariates: ~%~A~%" (covariate-names obj
))
263 (format stream
"Doc: ~%~A~%" (doc-string obj
))))
265 ;;; Computing and Display Methods
270 ;; so with (= (dim X) (list n p))
271 ;; we end up with p x p p x 1
274 ;; and this can be implemented by
276 (setf XY
(bind2 X Y
:by
:row
))
277 (setf XYtXY
(m* (transpose XY
) XY
))
279 ;; which is too procedural. Sigh, I meant
281 (setf XYtXY
(let ((XY (bind2 X Y
:by
:row
)))
282 (m* (transpose XY
) XY
)))
284 ;; which at least looks lispy.