merged what little useful structure still existed in the clos droppings
[CommonLispStat.git] / src / stat-models / regression.lisp
blobeb453ef69367d9eda987aeed130adf7648447ddb
1 ;;; -*- mode: lisp -*-
2 ;;;
3 ;;; Copyright (c) 2008--, by A.J. Rossini <blindglobe@gmail.com>
4 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
5 ;;; Since 1991, ANSI was finally finished. Modified to match ANSI
6 ;;; Common Lisp.
8 ;;;; Originally from:
9 ;;;; regression.lsp XLISP-STAT regression model proto and methods
10 ;;;; XLISP-STAT 2.1 Copyright (c) 1990, by Luke Tierney
11 ;;;; Additions to Xlisp 2.1, Copyright (c) 1989 by David Michael Betz
12 ;;;; You may give out copies of this software; for conditions see the file
13 ;;;; COPYING included with this distribution.
14 ;;;;
15 ;;;; Incorporates modifications suggested by Sandy Weisberg.
17 ;;; This version uses lisp-matrix for underlying numerics.
19 (in-package :lisp-stat-regression-linear)
21 ;;; Regresion Model Prototype
23 ;; The general strategy behind the fitting of models using prototypes
24 ;; is that we need to think about want the actual fits are, and then
25 ;; the fits can be used to recompute as components are changes. One
26 ;; catch here is that we'd like some notion of trace-ability, in
27 ;; particular, there is not necessarily a fixed way to take care of the
28 ;; audit trail. save-and-die might be a means of recording the final
29 ;; approach, but we are challenged by the problem of using advice and
30 ;; other such features to capture stages and steps that are considered
31 ;; along the goals of estimating a model.
33 ;; Note that the above is a stream-of-conscience response to the
34 ;; challenge of reproducibility in the setting of prototype "on-line"
35 ;; computation.
39 (defvar regression-model-proto nil
40 "Prototype for all regression model instances.")
42 (defproto regression-model-proto
43 '(x y intercept betahat basis weights
44 included
45 total-sum-of-squares
46 residual-sum-of-squares
47 predictor-names
48 response-name
49 case-labels
50 doc)
52 *object*
53 "Normal Linear Regression Model")
56 (defclass regression-model-store (statistical-model)
57 ((x :initform nil :initarg :x :accessor x)
58 (y :initform nil :initarg :y :accessor y)
59 (included :initform nil :initarg :y :accessor y)
60 (total-sum-of-squares :initform nil :initarg :y :accessor y)
61 (residual-sum-of-squares :initform nil :initarg :y :accessor y)
62 (predictor-names :initform nil :initarg :y :accessor y)
63 (response-name :initform nil :initarg :y :accessor y)
64 (case-labels :initform nil :initarg :y :accessor y)
65 (needs-computing :initform T :initarg :compute? :accessor compute?))
66 (:documentation "Normal Linear Regression Model through CLOS.
67 Historical design based on what was done for LispStat, not modern."))
69 (defclass model-specification ()
70 ((spec-string :initform nil
71 :initarg :specification
72 :accessor :specification)
73 (spec-form :initform nil
74 :initarg :spec-form
75 :accessor :spec-form)
76 (model-class :initform nil))
77 (:documentation "container for mathematical structure"))
79 (defclass bayesian-model-specification (model-specification)
80 ((prior-model-class)
81 (spec-string :initform nil
82 :initarg :specification
83 :accessor :specification)
84 (spec-form :initform nil
85 :initarg :spec-form
86 :accessor :spec-form))
87 (:documentation "adds structure holding priors to the model"))
89 ;;; The following should be self-created based on introspection of
90 ;;; available:
91 ;;; ## inferential technologies (bayesian, frequentist, etc),
92 ;;; ## optimization criteria (likelihood, least-squares, min-entropy,
93 ;;; minimax, etc)
94 ;;; ## simplification macros, i.e. mapping directly to linear
95 ;;; regression and other applications. fast specialized
96 ;;; algorithms for edge cases and narrow conditions.
97 ;;; ##
99 (defparameter *model-class-list*
100 '((linear-regression frequentist)
101 (generalized-linear-regression parametric)
102 (linear-regression bayesian)
103 ()))
105 ;;;;; More mischief from a different time
108 ;; regression-model is the old API, but regression as a generic will
109 ;; be the new API. We need to distinguish between APIs which enable
110 ;; the user to do clear activities, and APIs which enable developers
111 ;; to do clear extensions and development, and underlying
112 ;; infrastructure to keep everything straight and enabled.
114 ;; There are conflicting theories for how to structure the
115 ;; specification of mathematical models, along with the statistical
116 ;; inference, along with the data which is instantiating the model.
118 ;; i.e.: mathematical model for the relationships between components,
119 ;; between a component and a summarizing parameter, and between
120 ;; parameters.
122 ;; statistical inference describes the general approach for
123 ;; aggregating into a decision and has impliciations for the scale up
124 ;; from the model on a single instance to the generalization.
126 ;; The data represents the particular substantive context that is
127 ;; driving the model/inference combination, and about which we hope to
128 ;; generate knowledge.
130 ;; numerical analysis selects appropriate algorithms/implementations
131 ;; for combining the above 3.
133 ;; the end result is input on the decision being made (which could be
134 ;; specific (decision analysis/testing), risk-analysis (interval
135 ;; estimation) , most likely/appropriate selection (point estimation)
140 ;;;;;;;; Helper functions
143 (defun xtxinv (x)
144 "In: X
145 Out: (XtX)^-1
147 X is NxP, so result is PxP. Represents Var[\hat\beta], the vars for
148 \hat \beta from Y = X \beta + \eps. Done by Cholesky decomposition,
149 using LAPACK's dpotri routine to invert, after factorizing with dpotrf.
151 <example>
152 (let ((m1 (rand 7 5)))
153 (xtxinv m1))
154 </example>"
155 (check-type x matrix-like)
156 (minv-cholesky (m* (transpose x) x)))
159 ;; might add args: (method 'gelsy), or do we want to put a more
160 ;; general front end, linear-least-square, across the range of
161 ;; LAPACK solvers?
162 (defun lm (x y &optional rcond (intercept T))
163 "fit the linear model:
164 y = x \beta + e
166 and estimate \beta. X,Y should be in cases-by-vars form, i.e. X
167 should be n x p, Y should be n x 1. Returns estimates, n and p.
168 Probably should return a form providing the call, as well.
170 R's lm object returns: coefficients, residuals, effects, rank, fitted,
171 qr-results for numerical considerations, DF_resid. Need to
172 encapsulate into a class or struct."
173 (check-type x matrix-like)
174 (check-type y vector-like) ; vector-like might be too strict?
175 ; maybe matrix-like?
176 (assert (= (nrows y) (nrows x)) ; same number of observations/cases
177 (x y) "Can not multiply x:~S by y:~S" x y)
178 (let ((x1 (if intercept
179 (bind2 (ones (matrix-dimension x 0) 1)
180 x :by :column)
181 x)))
182 (let ((betahat (gelsy (m* (transpose x1) x1)
183 (m* (transpose x1) y)
184 (if rcond rcond (*
185 (coerce (expt 2 -52) 'double-float)
186 (max (nrows x1)
187 (ncols y))))))
188 (betahat1 (gelsy x1
190 (if rcond rcond
191 (* (coerce (expt 2 -52) 'double-float)
192 (max (nrows x1)
193 (ncols y)))))))
194 ;; need computation for SEs,
195 (format t "")
196 (list betahat ; LA-SIMPLE-VECTOR-DOUBLE
197 betahat1 ; LA-SLICE-VECVIEW-DOUBLE
198 (xtxinv x1); (sebetahat betahat x y) ; TODO: write me!
199 (nrows x) ; surrogate for n
200 (ncols x1) ; surrogate for p
201 ;; (v- (first betahat) (first betahat1))
202 ))))
207 (defun regression-model
208 (x y &key
209 (intercept T)
210 (print T)
211 (weights nil)
212 (included (repeat t (vector-dimension y)))
213 predictor-names
214 response-name
215 case-labels
216 (doc "Undocumented Regression Model Instance")
217 (debug T))
218 "Args: (x y &key (intercept T) (print T) (weights nil)
219 included predictor-names response-name case-labels)
220 X - list of independent variables or X matrix
221 Y - dependent variable.
222 INTERCEPT - T to include (default), NIL for no intercept
223 PRINT - if not NIL print summary information
224 WEIGHTS - if supplied should be the same length as Y; error
225 variances are
226 assumed to be inversely proportional to WEIGHTS
227 PREDICTOR-NAMES, RESPONSE-NAME, CASE-LABELS
228 - sequences of strings or symbols.
229 INCLUDED - if supplied should be the same length as Y, with
230 elements nil to skip a in computing estimates (but not
231 in residual analysis).
232 Returns a regression model object. To examine the model further assign the
233 result to a variable and send it messages.
234 Example (data are in file absorbtion.lsp in the sample data directory):
235 (def m (regression-model (list iron aluminum) absorbtion))
236 (send m :help) (send m :plot-residuals)"
237 (let ((x (cond
238 ((typep x 'matrix-like) x)
239 #| assume only numerical vectors -- but we need to ensure coercion to float.
240 ((or (typep x 'sequence)
241 (and (consp x)
242 (numberp (car x)))
243 (make-vector (length x) :initial-contents x)))
245 (t (error "not matrix-like.");x
246 ))) ;; actually, might should barf.
247 (y (cond
248 ((typep y 'vector-like) y)
250 ((and (consp x)
251 (numberp (car x))) (make-vector (length y) :initial-contents y))
253 (t (error "not vector-like."); y
254 ))) ;; actually, might should barf.
255 (m (send regression-model-proto :new)))
256 (format t "~%")
257 (send m :doc doc)
258 (send m :x x)
259 (send m :y y)
260 (send m :intercept intercept)
261 (send m :weights weights)
262 (send m :included included)
263 (send m :predictor-names predictor-names)
264 (send m :response-name response-name)
265 (send m :case-labels case-labels)
266 (if debug
267 (progn
268 (format t "~%")
269 (format t "~S~%" (send m :doc))
270 (format t "X: ~S~%" (send m :x))
271 (format t "Y: ~S~%" (send m :y))))
272 (if print (send m :display))
278 (defmeth regression-model-proto :isnew ()
279 (send self :needs-computing t))
281 (defmeth regression-model-proto :save ()
282 "Message args: ()
283 Returns an expression that will reconstruct the regression model."
284 `(regression-model ',(send self :x)
285 ',(send self :y)
286 :intercept ',(send self :intercept)
287 :weights ',(send self :weights)
288 :included ',(send self :included)
289 :predictor-names ',(send self :predictor-names)
290 :response-name ',(send self :response-name)
291 :case-labels ',(send self :case-labels)))
293 ;;; Computing and Display Methods
295 ;; [X|Y]t [X|Y]
296 ;; = XtX XtY
297 ;; YtX YtY
298 ;; so with (= (dim X) (list n p))
299 ;; we end up with p x p p x 1
300 ;; 1 x p 1 x 1
302 ;; and this can be implemented by
304 (setf XY (bind2 X Y :by :row))
305 (setf XYtXY (m* (transpose XY) XY))
307 ;; which is too procedural. Sigh, I meant
309 (setf XYtXY (let ((XY (bind2 X Y :by :row)))
310 (m* (transpose XY) XY)))
312 ;; which at least looks lispy.
314 (defmeth regression-model-proto :compute ()
315 "Message args: ()
316 Recomputes the estimates. For internal use by other messages"
317 (let* ((included (if-else (send self :included) 1d0 0d0))
318 (x (send self :x))
319 (y (send self :y))
320 (intercept (send self :intercept)) ;; T/nil
321 (weights (send self :weights)) ;; vector-like or nil
322 (w (if weights (* included weights) included))
323 (n (matrix-dimension x 0))
324 (p (if intercept
325 (1- (matrix-dimension x 1))
326 (matrix-dimension x 1))) ;; remove intercept from # params -- right?
327 (tss 0)
328 (res (make-vector (nrows x) :type :column :initial-element 0d0)) ; (compute-residuals y yhat)
329 (tol 0.000001
330 ;; (* 0.001 (reduce #'* (mapcar #'standard-deviation (list-of-columns x))))
332 (format t
333 "~%REMVME: regr-mdl-prto :compute~%x= ~A~%y= ~A~% tss= ~A~% tol= ~A~% w= ~A~% n= ~A~% res= ~A~%"
334 x y tss tol w n p res)
336 ;; (send self :beta-coefficents (lm x y)) ;; FIXME!
337 ;; (send self :xtxinv (xtxinv x)) ;; not settable?
339 (setf (proto-slot-value 'total-sum-of-squares) tss)
340 (setf (proto-slot-value 'residual-sum-of-squares)
342 ;; (m* (ones 1 n) (v* res res))
345 (defmeth regression-model-proto :needs-computing (&optional set)
346 "Message args: ( &optional set )
348 If value given, sets the flag for whether (re)computation is needed to
349 update the model fits."
350 (send self :nop)
351 (if set (setf (proto-slot-value 'betahat) nil))
352 (null (proto-slot-value 'betahat)))
354 (defmeth regression-model-proto :display ()
355 "Message args: ()
357 Prints the least squares regression summary. Variables not used in the fit
358 are marked as aliased."
359 (let ((coefs (vector-like->list (send self :coef-estimates)))
360 (se-s (send self :coef-standard-errors))
361 (x (send self :x))
362 (p-names (send self :predictor-names)))
363 (if (send self :weights)
364 (format t "~%Weighted Least Squares Estimates:~2%")
365 (format t "~%Least Squares Estimates:~2%"))
366 (when (send self :intercept)
367 (format t "Constant ~10f ~A~%"
368 (car coefs) (list (car se-s)))
369 (setf coefs (cdr coefs))
370 (setf se-s (cdr se-s)))
371 (dotimes (i (array-dimension x 1))
372 (cond
373 ((member i (send self :basis))
374 (format t "~22a ~10f ~A~%"
375 (select p-names i) (car coefs) (list (car se-s)))
376 (setf coefs (cdr coefs) se-s (cdr se-s)))
377 (t (format t "~22a aliased~%" (select p-names i)))))
378 (format t "~%")
379 (format t "R Squared: ~10f~%" (send self :r-squared))
380 (format t "Sigma hat: ~10f~%" (send self :sigma-hat))
381 (format t "Number of cases: ~10d~%" (send self :num-cases))
382 (if (/= (send self :num-cases) (send self :num-included))
383 (format t "Number of cases used: ~10d~%" (send self :num-included)))
384 (format t "Degrees of freedom: ~10d~%" (send self :df))
385 (format t "~%")))
387 ;;; Slot accessors and mutators
389 (defmeth regression-model-proto :doc (&optional new-doc append)
390 "Message args: (&optional new-doc)
392 Returns the DOC-STRING as supplied to m.
393 Additionally, with an argument NEW-DOC, sets the DOC-STRING to
394 NEW-DOC. In this setting, when APPEND is T, don't replace and just
395 append NEW-DOC to DOC."
396 (send self :nop)
397 (when (and new-doc (stringp new-doc))
398 (setf (proto-slot-value 'doc)
399 (if append
400 (concatenate 'string
401 (proto-slot-value 'doc)
402 new-doc)
403 new-doc)))
404 (proto-slot-value 'doc))
407 (defmeth regression-model-proto :x (&optional new-x)
408 "Message args: (&optional new-x)
410 With no argument returns the x matrix-like as supplied to m. With an
411 argument, NEW-X sets the x matrix-like to NEW-X and recomputes the
412 estimates."
413 (when (and new-x (typep new-x 'matrix-like))
414 (setf (proto-slot-value 'x) new-x)
415 (send self :needs-computing t))
416 (proto-slot-value 'x))
418 (defmeth regression-model-proto :y (&optional new-y)
419 "Message args: (&optional new-y)
421 With no argument returns the y vector-like as supplied to m. With an
422 argument, NEW-Y sets the y vector-like to NEW-Y and recomputes the
423 estimates."
424 (when (and new-y
425 (typep new-y 'vector-like))
426 (setf (proto-slot-value 'y) new-y) ;; fixme -- pls set slot value to a vector-like!
427 (send self :needs-computing t))
428 (proto-slot-value 'y))
430 (defmeth regression-model-proto :intercept (&optional (val nil set))
431 "Message args: (&optional new-intercept)
433 With no argument returns T if the model includes an intercept term,
434 nil if not. With an argument NEW-INTERCEPT the model is changed to
435 include or exclude an intercept, according to the value of
436 NEW-INTERCEPT."
437 (when set
438 (setf (proto-slot-value 'intercept) val)
439 (send self :needs-computing t))
440 (proto-slot-value 'intercept))
442 (defmeth regression-model-proto :weights (&optional (new-w nil set))
443 "Message args: (&optional new-w)
445 With no argument returns the weight vector-like as supplied to m; NIL
446 means an unweighted model. NEW-W sets the weights vector-like to NEW-W
447 and recomputes the estimates."
448 (when set
449 #| ;; probably need to use "check-type" or similar?
450 (and set nil
451 (or (= new-w nil)
452 (typep new-w 'vector-like)))
454 (setf (proto-slot-value 'weights) new-w)
455 (send self :needs-computing t))
456 (proto-slot-value 'weights))
458 (defmeth regression-model-proto :total-sum-of-squares ()
459 "Message args: ()
461 Returns the total sum of squares around the mean.
462 This is recomputed if an update is needed."
463 (if (send self :needs-computing)
464 (send self :compute))
465 (proto-slot-value 'total-sum-of-squares))
467 (defmeth regression-model-proto :residual-sum-of-squares ()
468 "Message args: ()
470 Returns the residual sum of squares for the model.
471 This is recomputed if an update is needed."
472 (if (send self :needs-computing)
473 (send self :compute))
474 (proto-slot-value 'residual-sum-of-squares))
476 (defmeth regression-model-proto :basis ()
477 "Message args: ()
479 Returns the indices of the variables used in fitting the model, in a
480 sequence.
481 This is recomputed if an update is needed."
482 (if (send self :needs-computing)
483 (send self :compute))
484 (proto-slot-value 'basis))
486 (defmeth regression-model-proto :included (&optional new-included)
487 "Message args: (&optional new-included)
489 With no argument, NIL means a case is not used in calculating
490 estimates, and non-nil means it is used. NEW-INCLUDED is a sequence
491 of length of y of nil and t to select cases. Estimates are
492 recomputed."
493 (when new-included
495 (and new-included
496 (= (length new-included) (send self :num-cases)))
498 (setf (proto-slot-value 'included) (copy-seq new-included))
499 (send self :needs-computing t))
500 (if (proto-slot-value 'included)
501 (proto-slot-value 'included)
502 (repeat t (send self :num-cases))))
504 (defmeth regression-model-proto :predictor-names (&optional (names nil set))
505 "Message args: (&optional (names nil set))
507 With no argument returns the predictor names. NAMES sets the names."
508 (if set (setf (proto-slot-value 'predictor-names) (mapcar #'string names)))
509 (let ((p (matrix-dimension (send self :x) 1))
510 (p-names (proto-slot-value 'predictor-names)))
511 (if (not (and p-names (= (length p-names) p)))
512 (setf (proto-slot-value 'predictor-names)
513 (mapcar #'(lambda (a) (format nil "Variable ~a" a))
514 (iseq 0 (- p 1))))))
515 (proto-slot-value 'predictor-names))
517 (defmeth regression-model-proto :response-name (&optional (name "Y" set))
518 "Message args: (&optional name)
520 With no argument returns the response name. NAME sets the name."
521 (send self :nop)
522 (if set (setf (proto-slot-value 'response-name) (if name (string name) "Y")))
523 (proto-slot-value 'response-name))
525 (defmeth regression-model-proto :case-labels (&optional (labels nil set))
526 "Message args: (&optional labels)
527 With no argument returns the case-labels. LABELS sets the labels."
528 (if set (setf (proto-slot-value 'case-labels)
529 (if labels
530 (mapcar #'string labels)
531 (mapcar #'(lambda (x) (format nil "~d" x))
532 (iseq 0 (- (send self :num-cases) 1))))))
533 (proto-slot-value 'case-labels))
536 ;;; Other Methods
537 ;;; None of these methods access any slots directly.
540 (defmeth regression-model-proto :num-cases ()
541 "Message args: ()
542 Returns the number of cases in the model."
543 (nelts (send self :y))) ; # cases in data, must accomodate weights or masking!
545 (defmeth regression-model-proto :num-included ()
546 "Message args: ()
547 Returns the number of cases used in the computations."
548 (sum (if-else (send self :included) 1 0)))
550 (defmeth regression-model-proto :num-coefs ()
551 "Message args: ()
552 Returns the number of coefficients in the fit model (including the
553 intercept if the model includes one)."
554 (if (send self :intercept)
555 (+ 1 (ncols (send self :x)))
556 (ncols (send self :x))))
558 (defmeth regression-model-proto :df ()
559 "Message args: ()
560 Returns the number of degrees of freedom in the model."
561 (- (send self :num-included) (send self :num-coefs)))
563 (defmeth regression-model-proto :x-matrix ()
564 "Message args: ()
565 Returns the X matrix for the model, including a column of 1's, if
566 appropriate. Columns of X matrix correspond to entries in basis."
567 (let ((m (select (send self :x)
568 (iseq 0 (- (send self :num-cases) 1))
569 (send self :basis))))
570 (if (send self :intercept)
571 (bind2 (repeat 1 (send self :num-cases)) m)
572 m)))
574 (defmeth regression-model-proto :leverages ()
575 "Message args: ()
576 Returns the diagonal elements of the hat matrix."
577 (let* ((x (send self :x-matrix))
578 (raw-levs
579 (m* (m* (m* x
580 (send self :xtxinv))
582 (repeat 1 (send self :num-coefs)))))
583 (if (send self :weights)
584 (m* (send self :weights) raw-levs)
585 raw-levs)))
587 (defmeth regression-model-proto :fit-values ()
588 "Message args: ()
589 Returns the fitted values for the model."
590 (m* (send self :x-matrix)
591 (send self :coef-estimates)))
593 (defmeth regression-model-proto :raw-residuals ()
594 "Message args: ()
595 Returns the raw residuals for a model."
596 (v- (send self :y) (send self :fit-values)))
598 (defmeth regression-model-proto :residuals ()
599 "Message args: ()
600 Returns the raw residuals for a model without weights. If the model
601 includes weights the raw residuals times the square roots of the weights
602 are returned."
603 (let ((raw-residuals (send self :raw-residuals))
604 (weights (send self :weights)))
605 (if weights (* (sqrt weights) raw-residuals) raw-residuals)))
607 (defmeth regression-model-proto :sum-of-squares ()
608 "Message args: ()
609 Returns the error sum of squares for the model."
610 (send self :residual-sum-of-squares))
612 (defmeth regression-model-proto :sigma-hat ()
613 "Message args: ()
614 Returns the estimated standard deviation of the deviations about the
615 regression line."
616 (let ((ss (send self :sum-of-squares))
617 (df (send self :df)))
618 (if (/= df 0) (sqrt (/ ss df)))))
620 ;; for models without an intercept the 'usual' formula for R^2 can give
621 ;; negative results; hence the max.
622 (defmeth regression-model-proto :r-squared ()
623 "Message args: ()
624 Returns the sample squared multiple correlation coefficient, R squared, for
625 the regression."
626 (max (- 1 (/ (send self :sum-of-squares) (send self :total-sum-of-squares)))
629 (defmeth regression-model-proto :coef-estimates ()
630 "Message args: ()
632 Returns the OLS (ordinary least squares) estimates of the regression
633 coefficients. Entries beyond the intercept correspond to entries in
634 basis."
635 (let ((x (send self :x)))
636 (princ x)))
638 (let ((n (matrix-dimension (send self :x) 1))
639 (indices (flatten-list
640 (if (send self :intercept)
641 (cons 0 (+ 1 (send self :basis)))
642 (list (+ 1 (send self :basis))))))
643 (x (send self :x)))
644 (format t "~%REMOVEME2: Coef-ests: ~% Sweep Matrix: ~A ~% array dim 1: ~A ~% Swept indices: ~A ~% basis: ~A"
645 x n indices (send self :basis))
646 (coerce (compound-data-seq (select m (1+ n) indices)) 'list))) ;; ERROR
649 (defmeth regression-model-proto :xtxinv ()
650 "Message args: ()
651 Returns ((X^T) X)^(-1) or ((X^T) W X)^(-1)."
652 (xtxinv (send self x)))
654 (defmeth regression-model-proto :coef-standard-errors ()
655 "Message args: ()
656 Returns estimated standard errors of coefficients. Entries beyond the
657 intercept correspond to entries in basis."
658 (let ((s (send self :sigma-hat)))
659 (if s (* (send self :sigma-hat) (sqrt (diagonalf (send self :xtxinv)))))))
661 (defmeth regression-model-proto :studentized-residuals ()
662 "Message args: ()
663 Computes the internally studentized residuals for included cases and externally studentized residuals for excluded cases."
664 (let ((res (send self :residuals))
665 (lev (send self :leverages))
666 (sig (send self :sigma-hat))
667 (inc (send self :included)))
668 (if-else inc
669 (/ res (* sig (sqrt (max .00001 (- 1 lev))))) ; vectorize max
670 (/ res (* sig (sqrt (+ 1 lev)))))))
672 (defmeth regression-model-proto :externally-studentized-residuals ()
673 "Message args: ()
674 Computes the externally studentized residuals."
675 (let* ((res (send self :studentized-residuals))
676 (df (send self :df)))
677 (if-else (send self :included)
678 (* res (sqrt (/ (- df 1) (- df (v* res res)))))
679 res)))
681 (defmeth regression-model-proto :cooks-distances ()
682 "Message args: ()
683 Computes Cook's distances."
684 (let ((lev (send self :leverages))
685 (res (/ (v* (send self :studentized-residuals)
686 (send self :studentized-residuals))
687 (send self :num-coefs))))
688 (if-else (send self :included) (* res (/ lev (- 1 lev) )) (* res lev))))
691 (defun plot-points (x y &rest args)
692 "need to fix."
693 (error "Graphics not implemented yet."))
698 ;; Can not plot points yet!!
699 (defmeth regression-model-proto :plot-residuals (&optional x-values)
700 "Message args: (&optional x-values)
701 Opens a window with a plot of the residuals. If X-VALUES are not supplied
702 the fitted values are used. The plot can be linked to other plots with the
703 link-views function. Returns a plot object."
704 (plot-points (if x-values x-values (send self :fit-values))
705 (send self :residuals)
706 :title "Residual Plot"
707 :point-labels (send self :case-labels)))
711 (defmeth regression-model-proto :plot-bayes-residuals
712 (&optional x-values)
713 "Message args: (&optional x-values)
715 Opens a window with a plot of the standardized residuals and two
716 standard error bars for the posterior distribution of the actual
717 deviations from the line. See Chaloner and Brant. If X-VALUES are not
718 supplied the fitted values are used. The plot can be linked to other
719 plots with the link-views function. Returns a plot object."
721 (let* ((r (/ (send self :residuals)
722 (send self :sigma-hat)))
723 (d (* 2 (sqrt (send self :leverages))))
724 (low (- r d))
725 (high (+ r d))
726 (x-values (if x-values x-values (send self :fit-values)))
727 (p (plot-points x-values r
728 :title "Bayes Residual Plot"
729 :point-labels (send self :case-labels))))
730 (map 'list #'(lambda (a b c d) (send p :plotline a b c d nil))
731 x-values low x-values high)
732 (send p :adjust-to-data)
736 ;;;; Other code
739 (defun print-lm (lm-obj)
740 "transcribed from R"
741 (p (rank lm-obj)
742 (when (= p 0)
743 ;; EVIL LOGIC! Just to store for now.
744 (let ()
745 (n (length (residuals lm-obj)))
746 (w (if (weights lm-obj)
747 (weights lm-obj)
748 (ones n 1)))
749 (r (if (weights lm-obj)
750 (residuals lm-obj)
751 (v.* (residuals lm-obj)
752 (mapcar #'sqrt (weights lm-obj)))))
753 (rss (sum (v.* r r)))
754 (resvar (/ rss (- n p)))
755 ;; then answer, to be encapsulated in a struct/class
756 ;; instance,
757 (aliased (is.na (coef lm-obj)))
758 (residuals r)
759 (df (list 0 n (length aliased)))
760 (coefficients (list 'NA 0d0 4d0))o
761 (sigma (sqrt resvar))
762 (r.squared 0d0)
763 (adj.r.squared 0d0)))
765 ;;otherwise...
766 (when (not (= p 0))
767 (let ((n (nrows (qr lm-obj)))
768 (rdf (- n p))
769 ))))
771 (lm *xv+1* *y2*)
772 (lm (transpose *xv*) *y2*)
774 (princ "Linear Models Code setup")