lisp-matrix direct name conversions.
[CommonLispStat.git] / src / stat-models / regression-clos.lisp
blob9fb5d296c11f8d8deff034a99169df3bdf10e61b
1 ;;; -*- mode: lisp -*-
3 ;;; Time-stamp: <2008-10-03 02:07:22 tony>
4 ;;; Creation: <2008-10-03 02:07:10 tony>
5 ;;; Author: AJ Rossini <blindglobe@gmail.com>
6 ;;; Copyright: (c)2007, AJ Rossini. BSD, LLGPL, or GPLv2, depending
7 ;;; on how it arrives.
8 ;;; Purpose: redoing regression in a CLOS based framework. See
9 ;;; regression.lsp for basis of work.
11 ;;; What is this talk of 'release'? Klingons do not make software
12 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
13 ;;; designers and quality assurance people in its wake.
15 ;;; This organization and structure is new to the 21st Century
16 ;;; version.
18 (in-package :lisp-stat-regression-linear-clos)
20 ;;; Regresion Model CLOS, data structures
22 (defclass regression-model-clos (statistical-model)
23 ((x :initform nil :initarg :x :accessor x)
24 (y :initform nil :initarg :y :accessor y)
25 (included :initform nil :initarg :y :accessor y)
26 (total-sum-of-squares :initform nil :initarg :y :accessor y)
27 (residual-sum-of-squares :initform nil :initarg :y :accessor y)
28 (predictor-names :initform nil :initarg :y :accessor y)
29 (response-name :initform nil :initarg :y :accessor y)
30 (case-labels :initform nil :initarg :y :accessor y)
31 (needs-computing :initform T :initarg :compute? :accessor compute?))
32 (:documentation "Normal Linear Regression Model through CLOS.
33 Historical design based on what was done for LispStat, not modern."))
35 (defclass model-specification ()
36 ((spec-string :initform nil
37 :initarg :specification
38 :accessor :specification)
39 (spec-form :initform nil
40 :initarg :spec-form
41 :accessor :spec-form)
42 (model-class :initform nil))
43 (:documentation "container for mathematical structure"))
45 (defclass bayesian-model-specification (model-specification)
46 ((prior-model-class)
47 (spec-string :initform nil
48 :initarg :specification
49 :accessor :specification)
50 (spec-form :initform nil
51 :initarg :spec-form
52 :accessor :spec-form))
53 (:documentation "adds structure holding priors to the model"))
55 ;;; The following should be self-created based on introspection of
56 ;;; available:
57 ;;; ## inferential technologies (bayesian, frequentist, etc),
58 ;;; ## optimization criteria (likelihood, least-squares, min-entropy,
59 ;;; minimax, etc)
60 ;;; ## simplification macros, i.e. mapping directly to linear
61 ;;; regression and other applications. fast specialized
62 ;;; algorithms for edge cases and narrow conditions.
63 ;;; ##
65 (defparameter *model-class-list*
66 '((linear-regression frequentist)
67 (generalized-linear-regression parametric)
68 (linear-regression bayesian)
69 ()))
73 ;;; Regression model generics and methods
75 (defgeneric regression-model (model-spec data-pointer &key debug)
76 (:documentation "CLOSy framework for regression, using numerics from "))
78 (defmethod regression-model
79 ((regr-inst regression-model-clos)
80 (data-ptr data-pointer)
81 &key debug)
82 "Args: (regr-inst regressino-model-clos)
84 Returns a fitted regression model object. To examine the model further
85 assign the result to a variable and send it messages. Example (data
86 are in file absorbtion.lsp in the sample data directory/folder):
88 (def fit-m (fit (new 'regression-model-clos (list iron aluminum) absorbtion)))
89 (print fit-m)
90 (plot fit-m :feature 'residuals)
92 (let ((x (get-variable-matrix (x regr-inst) data-ptr))
93 (y (get-variable-vector (y regr-inst) data-ptr)))
96 (defmeth regression-model-proto :compute ()
97 "Message args: ()
98 Recomputes the estimates. For internal use by other messages"
99 (let* ((included (if-else (send self :included) 1 0))
100 (x (send self :x))
101 (y (send self :y))
102 (intercept (send self :intercept))
103 (weights (send self :weights))
104 (w (if weights (* included weights) included))
105 (m (make-sweep-matrix x y w))
106 (n (array-dimension x 1))
107 (p (- (array-dimension m 0) 1))
108 (tss (aref m p p))
109 (tol (* 0.001 (reduce #'* (mapcar #'standard-deviation (column-list x)))))
110 ;; (tol (* 0.001 (apply #'* (mapcar #'standard-deviation (column-list x)))))
111 (sweep-result
112 (if intercept
113 (sweep-operator m (iseq 1 n) tol)
114 (sweep-operator m (iseq 0 n) (cons 0.0 tol)))))
115 (setf (slot-value 'sweep-matrix) (first sweep-result))
116 (setf (slot-value 'total-sum-of-squares) tss)
117 (setf (slot-value 'residual-sum-of-squares)
118 (aref (first sweep-result) p p))
119 (setf (slot-value 'basis)
120 (let ((b (remove 0 (second sweep-result))))
121 (if b (- (reduce #'- (reverse b)) 1)
122 (error "no columns could be swept"))))))
127 (defmeth regression-model-proto :display ()
128 "Message args: ()
129 Prints the least squares regression summary. Variables not used in the fit
130 are marked as aliased."
131 (let ((coefs (coerce (send self :coef-estimates) 'list))
132 (se-s (send self :coef-standard-errors))
133 (x (send self :x))
134 (p-names (send self :predictor-names)))
135 (if (send self :weights)
136 (format t "~%Weighted Least Squares Estimates:~2%")
137 (format t "~%Least Squares Estimates:~2%"))
138 (when (send self :intercept)
139 (format t "Constant ~10f ~A~%"
140 (car coefs) (list (car se-s)))
141 (setf coefs (cdr coefs))
142 (setf se-s (cdr se-s)))
143 (dotimes (i (array-dimension x 1))
144 (cond
145 ((member i (send self :basis))
146 (format t "~22a ~10f ~A~%"
147 (select p-names i) (car coefs) (list (car se-s)))
148 (setf coefs (cdr coefs) se-s (cdr se-s)))
149 (t (format t "~22a aliased~%" (select p-names i)))))
150 (format t "~%")
151 (format t "R Squared: ~10f~%" (send self :r-squared))
152 (format t "Sigma hat: ~10f~%" (send self :sigma-hat))
153 (format t "Number of cases: ~10d~%" (send self :num-cases))
154 (if (/= (send self :num-cases) (send self :num-included))
155 (format t "Number of cases used: ~10d~%" (send self :num-included)))
156 (format t "Degrees of freedom: ~10d~%" (send self :df))
157 (format t "~%")))
159 ;;; Slot accessors and mutators
162 (defmeth regression-model-proto :included (&optional new-included)
163 "Message args: (&optional new-included)
164 With no argument, NIL means a case is not used in calculating estimates, and non-nil means it is used. NEW-INCLUDED is a sequence of length of y of nil and t to select cases. Estimates are recomputed."
165 (when (and new-included
166 (= (length new-included) (send self :num-cases)))
167 (setf (slot-value 'included) (copy-seq new-included))
168 (send self :needs-computing t))
169 (if (slot-value 'included)
170 (slot-value 'included)
171 (repeat t (send self :num-cases))))
175 (defmeth regression-model-proto :leverages ()
176 "Message args: ()
177 Returns the diagonal elements of the hat matrix."
178 (let* ((weights (send self :weights))
179 (x (send self :x-matrix))
180 (raw-levs
181 (matmult (* (matmult x (send self :xtxinv)) x)
182 (repeat 1 (send self :num-coefs)))))
183 (if weights (* weights raw-levs) raw-levs)))
186 (defmeth regression-model-proto :raw-residuals ()
187 "Message args: ()
188 Returns the raw residuals for a model."
189 (- (send self :y) (send self :fit-values)))
191 (defmeth regression-model-proto :residuals ()
192 "Message args: ()
193 Returns the raw residuals for a model without weights. If the model
194 includes weights the raw residuals times the square roots of the weights
195 are returned."
196 (let ((raw-residuals (send self :raw-residuals))
197 (weights (send self :weights)))
198 (if weights (* (sqrt weights) raw-residuals) raw-residuals)))
201 (defmeth regression-model-proto :sigma-hat ()
202 "Message args: ()
203 Returns the estimated standard deviation of the deviations about the
204 regression line."
205 (let ((ss (send self :sum-of-squares))
206 (df (send self :df)))
207 (if (/= df 0) (sqrt (/ ss df)))))
210 (defmeth regression-model-proto :coef-estimates ()
211 "Message args: ()
212 Returns the OLS (ordinary least squares) estimates of the regression
213 coefficients. Entries beyond the intercept correspond to entries in basis."
214 (let ((n (array-dimension (send self :x) 1))
215 (indices (if (send self :intercept)
216 (cons 0 (+ 1 (send self :basis)))
217 (+ 1 (send self :basis))))
218 (m (send self :sweep-matrix)))
219 (coerce (compound-data-seq (select m (+ 1 n) indices)) 'list)))
221 (defmeth regression-model-proto :xtxinv ()
222 "Message args: ()
223 Returns ((X^T) X)^(-1) or ((X^T) W X)^(-1)."
224 (let ((indices (if (send self :intercept)
225 (cons 0 (1+ (send self :basis)))
226 (1+ (send self :basis)))))
227 (select (send self :sweep-matrix) indices indices)))
230 (defmeth regression-model-proto :studentized-residuals ()
231 "Message args: ()
232 Computes the internally studentized residuals for included cases and externally studentized residuals for excluded cases."
233 (let ((res (send self :residuals))
234 (lev (send self :leverages))
235 (sig (send self :sigma-hat))
236 (inc (send self :included)))
237 (if-else inc
238 (/ res (* sig (sqrt (pmax .00001 (- 1 lev)))))
239 (/ res (* sig (sqrt (+ 1 lev)))))))
241 (defmeth regression-model-proto :externally-studentized-residuals ()
242 "Message args: ()
243 Computes the externally studentized residuals."
244 (let* ((res (send self :studentized-residuals))
245 (df (send self :df)))
246 (if-else (send self :included)
247 (* res (sqrt (/ (- df 1) (- df (^ res 2)))))
248 res)))
250 (defmeth regression-model-proto :cooks-distances ()
251 "Message args: ()
252 Computes Cook's distances."
253 (let ((lev (send self :leverages))
254 (res (/ (^ (send self :studentized-residuals) 2)
255 (send self :num-coefs))))
256 (if-else (send self :included) (* res (/ lev (- 1 lev) )) (* res lev))))
260 (defmeth regression-model-proto :plot-bayes-residuals
261 (&optional x-values)
262 "Message args: (&optional x-values)
263 Opens a window with a plot of the standardized residuals and two standard
264 error bars for the posterior distribution of the actual deviations from the
265 line. See Chaloner and Brant. If X-VALUES are not supplied the fitted values
266 are used. The plot can be linked to other plots with the link-views function.
267 Returns a plot object."
268 (let* ((r (/ (send self :residuals) (send self :sigma-hat)))
269 (d (* 2 (sqrt (send self :leverages))))
270 (low (- r d))
271 (high (+ r d))
272 (x-values (if x-values x-values (send self :fit-values)))
273 (p (plot-points x-values r
274 :title "Bayes Residual Plot"
275 :point-labels (send self :case-labels))))
276 (map 'list #'(lambda (a b c d) (send p :plotline a b c d nil))
277 x-values low x-values high)
278 (send p :adjust-to-data)