3 ;;; Time-stamp: <2008-10-03 02:07:22 tony>
4 ;;; Creation: <2008-10-03 02:07:10 tony>
5 ;;; Author: AJ Rossini <blindglobe@gmail.com>
6 ;;; Copyright: (c)2007, AJ Rossini. BSD, LLGPL, or GPLv2, depending
8 ;;; Purpose: redoing regression in a CLOS based framework. See
9 ;;; regression.lsp for basis of work.
11 ;;; What is this talk of 'release'? Klingons do not make software
12 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
13 ;;; designers and quality assurance people in its wake.
15 ;;; This organization and structure is new to the 21st Century
18 (in-package :lisp-stat-regression-linear-clos
)
20 ;;; Regresion Model CLOS, data structures
22 (defclass regression-model-clos
(statistical-model)
23 ((x :initform nil
:initarg
:x
:accessor x
)
24 (y :initform nil
:initarg
:y
:accessor y
)
25 (included :initform nil
:initarg
:y
:accessor y
)
26 (total-sum-of-squares :initform nil
:initarg
:y
:accessor y
)
27 (residual-sum-of-squares :initform nil
:initarg
:y
:accessor y
)
28 (predictor-names :initform nil
:initarg
:y
:accessor y
)
29 (response-name :initform nil
:initarg
:y
:accessor y
)
30 (case-labels :initform nil
:initarg
:y
:accessor y
)
31 (needs-computing :initform T
:initarg
:compute?
:accessor compute?
))
32 (:documentation
"Normal Linear Regression Model through CLOS.
33 Historical design based on what was done for LispStat, not modern."))
35 (defclass model-specification
()
36 ((spec-string :initform nil
37 :initarg
:specification
38 :accessor
:specification
)
39 (spec-form :initform nil
42 (model-class :initform nil
))
43 (:documentation
"container for mathematical structure"))
45 (defclass bayesian-model-specification
(model-specification)
47 (spec-string :initform nil
48 :initarg
:specification
49 :accessor
:specification
)
50 (spec-form :initform nil
52 :accessor
:spec-form
))
53 (:documentation
"adds structure holding priors to the model"))
55 ;;; The following should be self-created based on introspection of
57 ;;; ## inferential technologies (bayesian, frequentist, etc),
58 ;;; ## optimization criteria (likelihood, least-squares, min-entropy,
60 ;;; ## simplification macros, i.e. mapping directly to linear
61 ;;; regression and other applications. fast specialized
62 ;;; algorithms for edge cases and narrow conditions.
65 (defparameter *model-class-list
*
66 '((linear-regression frequentist
)
67 (generalized-linear-regression parametric
)
68 (linear-regression bayesian
)
73 ;;; Regression model generics and methods
75 (defgeneric regression-model
(model-spec data-pointer
&key debug
)
76 (:documentation
"CLOSy framework for regression, using numerics from "))
78 (defmethod regression-model
79 ((regr-inst regression-model-clos
)
80 (data-ptr data-pointer
)
82 "Args: (regr-inst regressino-model-clos)
84 Returns a fitted regression model object. To examine the model further
85 assign the result to a variable and send it messages. Example (data
86 are in file absorbtion.lsp in the sample data directory/folder):
88 (def fit-m (fit (new 'regression-model-clos (list iron aluminum) absorbtion)))
90 (plot fit-m :feature 'residuals)
92 (let ((x (get-variable-matrix (x regr-inst
) data-ptr
))
93 (y (get-variable-vector (y regr-inst
) data-ptr
)))
96 (defmeth regression-model-proto
:compute
()
98 Recomputes the estimates. For internal use by other messages"
99 (let* ((included (if-else (send self
:included
) 1 0))
102 (intercept (send self
:intercept
))
103 (weights (send self
:weights
))
104 (w (if weights
(* included weights
) included
))
105 (m (make-sweep-matrix x y w
))
106 (n (array-dimension x
1))
107 (p (- (array-dimension m
0) 1))
109 (tol (* 0.001 (reduce #'* (mapcar #'standard-deviation
(column-list x
)))))
110 ;; (tol (* 0.001 (apply #'* (mapcar #'standard-deviation (column-list x)))))
113 (sweep-operator m
(iseq 1 n
) tol
)
114 (sweep-operator m
(iseq 0 n
) (cons 0.0 tol
)))))
115 (setf (slot-value 'sweep-matrix
) (first sweep-result
))
116 (setf (slot-value 'total-sum-of-squares
) tss
)
117 (setf (slot-value 'residual-sum-of-squares
)
118 (aref (first sweep-result
) p p
))
119 (setf (slot-value 'basis
)
120 (let ((b (remove 0 (second sweep-result
))))
121 (if b
(- (reduce #'-
(reverse b
)) 1)
122 (error "no columns could be swept"))))))
127 (defmeth regression-model-proto
:display
()
129 Prints the least squares regression summary. Variables not used in the fit
130 are marked as aliased."
131 (let ((coefs (coerce (send self
:coef-estimates
) 'list
))
132 (se-s (send self
:coef-standard-errors
))
134 (p-names (send self
:predictor-names
)))
135 (if (send self
:weights
)
136 (format t
"~%Weighted Least Squares Estimates:~2%")
137 (format t
"~%Least Squares Estimates:~2%"))
138 (when (send self
:intercept
)
139 (format t
"Constant ~10f ~A~%"
140 (car coefs
) (list (car se-s
)))
141 (setf coefs
(cdr coefs
))
142 (setf se-s
(cdr se-s
)))
143 (dotimes (i (array-dimension x
1))
145 ((member i
(send self
:basis
))
146 (format t
"~22a ~10f ~A~%"
147 (select p-names i
) (car coefs
) (list (car se-s
)))
148 (setf coefs
(cdr coefs
) se-s
(cdr se-s
)))
149 (t (format t
"~22a aliased~%" (select p-names i
)))))
151 (format t
"R Squared: ~10f~%" (send self
:r-squared
))
152 (format t
"Sigma hat: ~10f~%" (send self
:sigma-hat
))
153 (format t
"Number of cases: ~10d~%" (send self
:num-cases
))
154 (if (/= (send self
:num-cases
) (send self
:num-included
))
155 (format t
"Number of cases used: ~10d~%" (send self
:num-included
)))
156 (format t
"Degrees of freedom: ~10d~%" (send self
:df
))
159 ;;; Slot accessors and mutators
162 (defmeth regression-model-proto
:included
(&optional new-included
)
163 "Message args: (&optional new-included)
164 With no argument, NIL means a case is not used in calculating estimates, and non-nil means it is used. NEW-INCLUDED is a sequence of length of y of nil and t to select cases. Estimates are recomputed."
165 (when (and new-included
166 (= (length new-included
) (send self
:num-cases
)))
167 (setf (slot-value 'included
) (copy-seq new-included
))
168 (send self
:needs-computing t
))
169 (if (slot-value 'included
)
170 (slot-value 'included
)
171 (repeat t
(send self
:num-cases
))))
175 (defmeth regression-model-proto
:leverages
()
177 Returns the diagonal elements of the hat matrix."
178 (let* ((weights (send self
:weights
))
179 (x (send self
:x-matrix
))
181 (matmult (* (matmult x
(send self
:xtxinv
)) x
)
182 (repeat 1 (send self
:num-coefs
)))))
183 (if weights
(* weights raw-levs
) raw-levs
)))
186 (defmeth regression-model-proto
:raw-residuals
()
188 Returns the raw residuals for a model."
189 (- (send self
:y
) (send self
:fit-values
)))
191 (defmeth regression-model-proto
:residuals
()
193 Returns the raw residuals for a model without weights. If the model
194 includes weights the raw residuals times the square roots of the weights
196 (let ((raw-residuals (send self
:raw-residuals
))
197 (weights (send self
:weights
)))
198 (if weights
(* (sqrt weights
) raw-residuals
) raw-residuals
)))
201 (defmeth regression-model-proto
:sigma-hat
()
203 Returns the estimated standard deviation of the deviations about the
205 (let ((ss (send self
:sum-of-squares
))
206 (df (send self
:df
)))
207 (if (/= df
0) (sqrt (/ ss df
)))))
210 (defmeth regression-model-proto
:coef-estimates
()
212 Returns the OLS (ordinary least squares) estimates of the regression
213 coefficients. Entries beyond the intercept correspond to entries in basis."
214 (let ((n (array-dimension (send self
:x
) 1))
215 (indices (if (send self
:intercept
)
216 (cons 0 (+ 1 (send self
:basis
)))
217 (+ 1 (send self
:basis
))))
218 (m (send self
:sweep-matrix
)))
219 (coerce (compound-data-seq (select m
(+ 1 n
) indices
)) 'list
)))
221 (defmeth regression-model-proto
:xtxinv
()
223 Returns ((X^T) X)^(-1) or ((X^T) W X)^(-1)."
224 (let ((indices (if (send self
:intercept
)
225 (cons 0 (1+ (send self
:basis
)))
226 (1+ (send self
:basis
)))))
227 (select (send self
:sweep-matrix
) indices indices
)))
230 (defmeth regression-model-proto
:studentized-residuals
()
232 Computes the internally studentized residuals for included cases and externally studentized residuals for excluded cases."
233 (let ((res (send self
:residuals
))
234 (lev (send self
:leverages
))
235 (sig (send self
:sigma-hat
))
236 (inc (send self
:included
)))
238 (/ res
(* sig
(sqrt (pmax .00001 (- 1 lev
)))))
239 (/ res
(* sig
(sqrt (+ 1 lev
)))))))
241 (defmeth regression-model-proto
:externally-studentized-residuals
()
243 Computes the externally studentized residuals."
244 (let* ((res (send self
:studentized-residuals
))
245 (df (send self
:df
)))
246 (if-else (send self
:included
)
247 (* res
(sqrt (/ (- df
1) (- df
(^ res
2)))))
250 (defmeth regression-model-proto
:cooks-distances
()
252 Computes Cook's distances."
253 (let ((lev (send self
:leverages
))
254 (res (/ (^
(send self
:studentized-residuals
) 2)
255 (send self
:num-coefs
))))
256 (if-else (send self
:included
) (* res
(/ lev
(- 1 lev
) )) (* res lev
))))
260 (defmeth regression-model-proto
:plot-bayes-residuals
262 "Message args: (&optional x-values)
263 Opens a window with a plot of the standardized residuals and two standard
264 error bars for the posterior distribution of the actual deviations from the
265 line. See Chaloner and Brant. If X-VALUES are not supplied the fitted values
266 are used. The plot can be linked to other plots with the link-views function.
267 Returns a plot object."
268 (let* ((r (/ (send self
:residuals
) (send self
:sigma-hat
)))
269 (d (* 2 (sqrt (send self
:leverages
))))
272 (x-values (if x-values x-values
(send self
:fit-values
)))
273 (p (plot-points x-values r
274 :title
"Bayes Residual Plot"
275 :point-labels
(send self
:case-labels
))))
276 (map 'list
#'(lambda (a b c d
) (send p
:plotline a b c d nil
))
277 x-values low x-values high
)
278 (send p
:adjust-to-data
)