From 0ed7799741d84c87a1ca520786234871e2a68e2a Mon Sep 17 00:00:00 2001 From: AJ Rossini Date: Thu, 4 Dec 2008 08:10:29 +0100 Subject: [PATCH] start of lisp-matrix conversion. Generic functions for statistical processing. --- src/describe/statistics.lsp | 96 ++++++++++++++++++++++++++++------------- src/packages.lisp | 12 ++---- src/stat-models/regression.lisp | 89 ++++++++++++++++++++++++++++++-------- 3 files changed, 142 insertions(+), 55 deletions(-) diff --git a/src/describe/statistics.lsp b/src/describe/statistics.lsp index a4cb499..747453c 100644 --- a/src/describe/statistics.lsp +++ b/src/describe/statistics.lsp @@ -1,5 +1,5 @@ ;;; -*- mode: lisp -*- -;;; Copyright (c) 2005--2008, by A.J. Rossini +;;; Copyright (c) 2005--2009, by A.J. Rossini ;;; See COPYRIGHT file for any additional restrictions (BSD license). ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp. @@ -16,9 +16,73 @@ ;;; Basic Summary Statistics ;;; -(defun standard-deviation (x) +(defgeneric mean (x) + (:documentation "compute the mean of lists, vectors, various objects") + (:method ((x list)) + (/ (reduce #'+ x) + (length x))) + (:method ((x vector-like)) + (let ((n (nelts x)) + (type (if (column-vector-p x) :column :row))) + (/ (gemm x (ones + (ecase type (:row 1) (:column n)) + (ecase type (:row n) (:column 1)))) + n)))) + +(defun mean-fn (x) "Args: (x) -Returns the standard deviation of the elements x. Vector reducing." +Returns the mean of the elements x. Vector reducing. + +FIXME: Why is this so complex? When figure out, put into generic." + (let ((mean 0.0) + (count 0.0)) + (labels ((add-to-mean (x) + (let ((count+1 (+ count 1.0))) + (setf mean (+ (* (/ count count+1) mean) (* (/ count+1) x))) + (setf count count+1))) + (find-mean (x) + (if (numberp x) + (add-to-mean x) + (let ((seq (compound-data-seq x))) + (if (consp seq) + (dolist (x seq) + (if (numberp x) (add-to-mean x) (find-mean x))) + (let ((n (length seq))) + (dotimes (i n) + (declare (fixnum i)) + (let ((x (aref seq i))) + (if (numberp x) + (add-to-mean x) + (find-mean x)))))))))) + (find-mean x) + mean))) + + +;; We do the variance, since the SD is simply the root. +(defgeneric variance (x) + (:documentation "compute the variance of the entity X, i.e. if a + scalar, vector, or matrix.") + (:method ((x list)) + (let ((n (length x)) + (r (- x (mean x)))) + (sqrt (* (mean (* r r)) (/ n (- n 1)))))) + (:method ((x vector-like)))) + +(defgeneric standard-deviation (x) + (:documentation "Compute standard deivation of entity X.") + (:method ((x vector-like)) (sqrt (variance x))) + (:method ((x list)) + ;; if elements are not numeric, error, otherwise + (sqrt (variance x))) + (:method ((x matrix-like)) + (error "FIXME: define SD for matrix-like objects"))) + +(defun standard-deviation-fn (x) +"Args: (x) +Returns the standard deviation of the elements x. Vector reducing. + +FIXME AJR: This should be redone as square-root of the variance, and +defer to that structure for computation." (let ((n (count-elements x)) (r (- x (mean x)))) (sqrt (* (mean (* r r)) (/ n (- n 1)))))) @@ -99,29 +163,3 @@ without replacement." -(defun mean (x) -"Args: (x) -Returns the mean of the elements x. Vector reducing." - (let ((mean 0.0) - (count 0.0)) - (labels ((add-to-mean (x) - (let ((count+1 (+ count 1.0))) - (setf mean (+ (* (/ count count+1) mean) (* (/ count+1) x))) - (setf count count+1))) - (find-mean (x) - (if (numberp x) - (add-to-mean x) - (let ((seq (compound-data-seq x))) - (if (consp seq) - (dolist (x seq) - (if (numberp x) (add-to-mean x) (find-mean x))) - (let ((n (length seq))) - (dotimes (i n) - (declare (fixnum i)) - (let ((x (aref seq i))) - (if (numberp x) - (add-to-mean x) - (find-mean x)))))))))) - (find-mean x) - mean))) - diff --git a/src/packages.lisp b/src/packages.lisp index 028f06c..1834cac 100644 --- a/src/packages.lisp +++ b/src/packages.lisp @@ -1,6 +1,6 @@ ;;; -*- mode: lisp -*- -;;; Time-stamp: <2008-11-25 08:31:31 tony> +;;; Time-stamp: <2008-12-03 07:49:59 tony> ;;; Creation: <2008-03-11 19:18:34 user> ;;; File: packages.lisp ;;; Author: AJ Rossini @@ -132,12 +132,13 @@ ;; could be value, otherwise might be reference. data-pointer)) +#| (defpackage :lisp-stat-regression-linear-clos (:use :common-lisp :lisp-matrix :lisp-stat-data-clos) (:export regression-model)) - +|# ;;; USER PACKAGES @@ -278,13 +279,6 @@ :lisp-stat-math :lisp-stat-compound-data :lisp-matrix -#| - ;; redone within lisp-matrix -- will need to have a package for - ;; any leftovers... - :lisp-stat-matrix - :lisp-stat-linalg-data - :lisp-stat-linalg -|# :lisp-stat-basics) (:shadowing-import-from :lisp-stat-math ;; life is a vector! expt + - * / ** mod rem abs 1+ 1- log exp sqrt sin cos tan diff --git a/src/stat-models/regression.lisp b/src/stat-models/regression.lisp index 7fab075..bfef870 100644 --- a/src/stat-models/regression.lisp +++ b/src/stat-models/regression.lisp @@ -51,16 +51,17 @@ "Normal Linear Regression Model") -(defun regression-model (x y &key - (intercept T) - (print T) - (weights nil) - (included (repeat t (length y))) - predictor-names - response-name - case-labels - (doc "Undocumented Regression Model Instance") - (debug T)) +(defun regression-model-old + (x y &key + (intercept T) + (print T) + (weights nil) + (included (repeat t (length y))) + predictor-names + response-name + case-labels + (doc "Undocumented Regression Model Instance") + (debug T)) "Args: (x y &key (intercept T) (print T) (weights nil) included predictor-names response-name case-labels) X - list of independent variables or X matrix @@ -111,6 +112,59 @@ Example (data are in file absorbtion.lsp in the sample data directory): (if print (send m :display)) m)) + + +(defun regression-model ;; assumes x/y from lisp-matrix -- start of a set of generics. + (x y &key + (intercept T) + (print T) + (weights nil) + (included (repeat t (length y))) + predictor-names + response-name + case-labels + (doc "Undocumented Regression Model Instance") + (debug T)) + "Args: (x y &key (intercept T) (print T) (weights nil) + included predictor-names response-name case-labels) +X - list of independent variables or X matrix +Y - dependent variable. +INTERCEPT - T to include (default), NIL for no intercept +PRINT - if not NIL print summary information +WEIGHTS - if supplied should be the same length as Y; error + variances are + assumed to be inversely proportional to WEIGHTS +PREDICTOR-NAMES, RESPONSE-NAME, CASE-LABELS + - sequences of strings or symbols. +INCLUDED - if supplied should be the same length as Y, with + elements nil to skip a in computing estimates (but not + in residual analysis). +Returns a regression model object. To examine the model further assign the +result to a variable and send it messages. +Example (data are in file absorbtion.lsp in the sample data directory): + (def m (regression-model (list iron aluminum) absorbtion)) + (send m :help) (send m :plot-residuals)" + (let ((m (send regression-model-proto :new))) + (format t "~%") + (send m :doc doc) + (send m :x x) + (send m :y y) + (send m :intercept intercept) + (send m :weights weights) + (send m :included included) + (send m :predictor-names predictor-names) + (send m :response-name response-name) + (send m :case-labels case-labels) + (if debug + (progn + (format t "~%") + (format t "~S~%" (send m :doc)) + (format t "X: ~S~%" (send m :x)) + (format t "Y: ~S~%" (send m :y)))) + (if print (send m :display)) + m)) + + (defmeth regression-model-proto :isnew () (send self :needs-computing t)) @@ -141,7 +195,8 @@ Recomputes the estimates. For internal use by other messages" (n (matrix-dimension x 1)) (p (- (matrix-dimension m 0) 1)) (tss (mref m p p)) - (tol (* 0.001 (reduce #'* (mapcar #'standard-deviation (column-list x))))) + (tol (* 0.001 + (reduce #'* (mapcar #'standard-deviation (column-list x))))) (sweep-result (if intercept (sweep-operator m (iseq 1 n) tol) @@ -239,7 +294,7 @@ With no argument returns the y vector-like as supplied to m. With an argument, NEW-Y sets the y vector-like to NEW-Y and recomputes the estimates." (when (and new-y - (typep new-y vector-like)) + (typep new-y 'vector-like)) (setf (slot-value 'y) new-y) ;; fixme -- pls set slot value to a vector-like! (send self :needs-computing t)) (slot-value 'y)) @@ -263,7 +318,7 @@ With no argument returns the weight vector-like as supplied to m; NIL means an unweighted model. NEW-W sets the weights vector-like to NEW-W and recomputes the estimates." (when (and set - (typep new-w vector-like)) + (typep new-w 'vector-like)) (setf (slot-value 'weights) new-w) (send self :needs-computing t)) (slot-value 'weights)) @@ -396,14 +451,14 @@ Returns the diagonal elements of the hat matrix." (let* ((weights (send self :weights)) (x (send self :x-matrix)) (raw-levs - (matmult (* (matmult x (send self :xtxinv)) x) - (repeat 1 (send self :num-coefs))))) + (m* (* (m* x (send self :xtxinv)) x) + (repeat 1 (send self :num-coefs))))) (if weights (* weights raw-levs) raw-levs))) (defmeth regression-model-proto :fit-values () "Message args: () Returns the fitted values for the model." - (matmult (send self :x-matrix) (send self :coef-estimates))) + (m* (send self :x-matrix) (send self :coef-estimates))) (defmeth regression-model-proto :raw-residuals () "Message args: () @@ -470,7 +525,7 @@ Returns ((X^T) X)^(-1) or ((X^T) W X)^(-1)." Returns estimated standard errors of coefficients. Entries beyond the intercept correspond to entries in basis." (let ((s (send self :sigma-hat))) - (if s (* (send self :sigma-hat) (sqrt (diagonal (send self :xtxinv))))))) + (if s (* (send self :sigma-hat) (sqrt (diagonalf (send self :xtxinv))))))) (defmeth regression-model-proto :studentized-residuals () "Message args: () -- 2.11.4.GIT