From f85f70fae16d9a84d2e3661285891013533388e9 Mon Sep 17 00:00:00 2001 From: AJ Rossini Date: Fri, 3 Oct 2008 02:26:42 +0200 Subject: [PATCH] CLOS experiment (data, linear regression) proceeding. --- src/data/data-clos.lisp | 497 ++++++------------------ src/packages.lisp | 36 +- src/stat-models/regression-clos.lisp | 720 ++++++++++++++--------------------- 3 files changed, 414 insertions(+), 839 deletions(-) rewrite src/data/data-clos.lisp (66%) rewrite src/stat-models/regression-clos.lisp (72%) diff --git a/src/data/data-clos.lisp b/src/data/data-clos.lisp dissimilarity index 66% index 251903c..ffc4fd5 100644 --- a/src/data/data-clos.lisp +++ b/src/data/data-clos.lisp @@ -1,381 +1,116 @@ -;;; -*- mode: lisp -*- - -;;; File: data-clos.lisp -;;; Author: AJ Rossini -;;; Copyright: (c)2008, AJ Rossini. BSD, LLGPL, or GPLv2, depending -;;; on how it arrives. -;;; Purpose: data package for lispstat -;;; Time-stamp: <2008-09-02 18:40:53 tony> -;;; Creation: <2008-03-12 17:18:42 user> - -;;; What is this talk of 'release'? Klingons do not make software -;;; 'releases'. Our software 'escapes', leaving a bloody trail of -;;; designers and quality assurance people in its wake. - -;;; This organization and structure is new to the 21st Century -;;; version. - -;;; data-clos.lisp -;;; -;;; redoing data structures in a CLOS based framework. -;;; -;;; No real basis for work, there is a bit of new-ness and R-ness to -;;; this work. In particular, the notion of relation is key and -;;; integral to the analysis. Tables are related and matched -;;; vectors,for example. "column" vectors are related observations -;;; (by measure/recording) while "row" vectors are related readings -;;; (by case) -;;; - -;;; Relational structure -- can we capture a completely unnormalized -;;; data strucutre to propose possible modeling approaches, and -;;; propose appropriate models and inferential strategies? -;;; - -;; verb-driven schema for data collection. Should encode independence -;; or lack of when possible. - -#+nil(progn - (def-statschema MyDB - :tables (list (list t1 ) - (list t2 ) - (list t4 )) - :unique-key key - :stat-relation '(t1 (:nest-within t2) (:nest-within t3)) - :)) - - - -(in-package :cl-user) - -(defpackage :lisp-stat-data-clos - (:use :common-lisp - ;;:clem - ) - (:export statistical-dataset ;; primary class for working. - - modifyData ;; metadata mods - importData ;; get it in - reshapeData ;; data mods - - consistent-statistical-dataset-p - varNames caseNames ;; metadata explicit modifiers - - extract - ;; and later, we remove the following, exposing only - ;; through the above method. - extract-1 extract-row extract-col extract-idx - )) - -(in-package :lisp-stat-data-clos) - -;; Need to figure out typed vectors. We then map a series of typed -;; vectors over to tables where columns are equal typed. In a sense, -;; this is a relation (1-1) of equal-typed arrays. For the most part, -;; this ends up making the R data.frame into a relational building -;; block (considering 1-1 mappings using row ID as a relation). -;; Is this a worthwhile generalization? - -(defclass statistical-dataset-metadata (rdf-type) - (()) - - - ) - -(defclass statistical-dataset () - ((store :initform nil - :initarg :storage - :accessor dataset - :documentation "Data storage: typed as table, array, - relation, or pointer/reference to such.") - (documentation-string :initform nil - :initarg :doc - :accessor doc-string - :documentation "uncomputable information - about statistical-dataset - instance.") - - ;; the rest of this is metadata. In particular, we should find a - ;; more flexible, compact way to store this. - (case-labels :initform nil - :initarg :case-labels - :accessor case-labels - :documentation "labels used for describing cases (doc - metadata), possibly used for merging.") - (var-labels :initform nil - :initarg :var-labels - :accessor var-labels - :documentation "Variable names.")) - (:documentation "Standard Cases by Variables Statistical-Dataset, - i.e. an S data.frame.")) - -;; -;; statistical-dataset is the basic cases by variables framework. -;; Need to embed this within other structures which allow for -;; generalized relations. Goal is to ensure that relations imply and -;; drive the potential for statistical relativeness such as -;; correlation, interference, and similar concepts. -;; -;; Actions on a statistical data structure. -;; - -(defgeneric consistent-statistical-dataset-p (ds) - (:documentation "methods to check for consistency.")) - -(defmethod consistent-statistical-dataset-p ((ds statistical-dataset)) - "Test that statistical-dataset is internally consistent with metadata. -Ensure that dims of stored data are same as case and var labels." - (equal (array-dimensions (dataset ds)) - (list (length (var-labels ds)) - (length (case-labels ds))))) - -;;; Extraction - -(defun extract-1 (sds idx1 idx2) - "Returns a scalar." - (aref (dataset sds) idx1 idx2)) - -(defun extract-1-as-sds (sds idx1 idx2) - "Need a version which returns a dataset." - (make-instance 'statistical-dataset - :storage (make-array - (list 1 1) - :initial-contents (extract-1 sds idx1 idx2)) - ;; ensure copy for this and following - :doc (doc-string sds) - :case-labels (caseNames sds) - :var-labels (varNames sds))) - -(defun gen-seq (n &optional (start 1)) - "There has to be a better way -- I'm sure of it! Always count from 1." - (if (>= n start) - (append (gen-seq (- n 1) start) (list n)))) -;; (gen-seq 4) -;; => (1 2 3 4) -;; (gen-seq 0) -;; => nil -;; (gen-seq 5 3) -;; => 3 4 5 -;; - -(defun extract-col (sds index) - "Returns data as sequence." - (map 'sequence - #'(lambda (x) (extract-1 sds index x)) - (gen-seq (nth 2 (array-dimensions (dataset sds)))))) - -(defun extract-col-as-sds (sds index) - "Returns data as SDS, copied." - (map 'sequence - #'(lambda (x) (extract-1 sds index x)) - (gen-seq (nth 2 (array-dimensions (dataset sds)))))) - -(defun extract-row (sds index) - "Returns row as sequence." - (map 'sequence - #'(lambda (x) (extract-1 sds x index)) - (gen-seq (nth 1 (array-dimensions (dataset sds)))))) - -(defun extract-idx (sds idx1Lst idx2Lst) - "return an array, row X col dims. FIXME TESTME" - (let ((my-pre-array (list))) - (dolist (x idx1Lst) - (dolist (y idx2Lst) - (append my-pre-array (extract-1 sds x y)))) - (make-array (list (length idx1Lst) (length idx2Lst)) - :initial-contents my-pre-array))) - - -(defun extract-idx-sds (sds idx1Lst idx2Lst) - "return a dataset encapsulated version of extract-idx." - (make-instance 'statistical-dataset - :storage (make-array - (list (length idx1Lst) (length idx2Lst)) - :initial-contents (dataset sds)) - ;; ensure copy for this and following - :doc (doc-string sds) - :case-labels (caseNames sds) - :var-labels (varNames sds))) - -(defgeneric extract (sds whatAndRange) - (:documentation "data extraction approach")) - -;;; Printing methods and support. - -(defun print-as-row (seq) - "Print a sequence formated as a row in a table." - (format t "~{~D~T~}" seq)) - -;; (print-as-row (list 1 2 3)) - -(defun print-structure-table (ds) - "example of what we want the methods to look like. Should be sort -of like a spreadsheet if the storage is a table." - (print-as-row (var-labels ds)) - (let ((j -1)) - (dolist (i (case-labels ds)) - (print-as-row (append (list i) - (extract-row (dataset ds) (incf j))))))) - -#| -(defun print-structure-relational (ds) - "example of what we want the methods to look like. Should be sort -of like a graph of spreadsheets if the storage is a relational -structure." - (dolist (k (relations ds)) - (let ((currentRelationSet (getRelation ds k))) - (print-as-row (var-labels currentRelationSet)) - (let ((j -1)) - (dolist (i (case-labels currentRelationSet)) - (print-as-row - (append (list i) - (extract-row (dataset currentRelationSet) - (incf j))))))))) -|# - - -;;; Shaping for computation - -(defgeneric reshapeData (dataform into-form as-copy) - (:documentation "pulling data into a new form")) - -(defmethod reshapeData ((sds statistical-dataset) what into-form)) - -(defmethod reshapeData ((ds array) (sp list) copy-p) - "Array via specList specialization: similar to the common R -approaches to redistribution.") - -(defclass data-format () ()) - -(defun row-order-as-list (ary) - "Pull out data in row order into a list." - (let ((result (list)) - (nrows (nth 0 (array-dimensions ary))) - (ncols (nth 1 (array-dimensions ary)))) - (dotimes (i ncols) - (dotimes (j nrows) - (nappend result (aref ary i j)))))) - -(defun col-order-as-list (ary) - "Pull out data in row order into a list." - (let ((result (list)) - (nrows (nth 0 (array-dimensions ary))) - (ncols (nth 1 (array-dimensions ary)))) - (dotimes (i nrows) - (dotimes (j ncols) - (nappend result (aref ary i j)))))) - -(defun transpose (ary) - "map NxM to MxN." - (make-array (reverse (array-dimensions ary)) - :initial-contents (col-order-as-list ary))) - - -;;; verbs vs semantics for dt conversion -- consider the possibily of -;;; how adverbs and verbs relate, where to put which semantically to -;;; allow for general approach. - -;;; eg. Kasper's talk on the FUSION collection of parsers. - - - - - - -;;; Need to consider modification APIs -;;; actions are: -;;; - import -;;; - get/set row names (case names) -;;; - column names (variable names) -;;; - dataset values -;;; - annotation/metadata -;;; - make sure that we do coherency checking in the exported -;;; - functions. -;;; - ... -;;; - reshapeData/reformat/reshapr a reformed version of the dataset (no -;;; additional input). -;;; - either overwriting or not, i.e. with or without copy. -;;; - check consistency of resulting data with metadata and related -;;; data information. -;;; - - -;;; Variable-name handling for Tables. Needs error checking. -(defun varNames (ds) - (var-labels ds)) - -(defun set-varNames (ds vN) - (if (= (length (var-labels ds)) - (length vN)) - (setf (var-labels ds) vN) - (error "wrong size."))) - -(defsetf varNames set-varNames) - -;;; Case-name handling for Tables. Needs error checking. -(defun caseNames (ds) - (case-labels ds)) - -(defun set-caseNames (ds vN) - (if (= (length (case-labels ds)) - (length vN)) - (setf (case-labels ds) vN) - (error "wrong size."))) - -(defsetf caseNames set-caseNames) - -;;; General modification approaches. - -(defgeneric importData (source featureList) - (:documentation "command to get data into CLS. Specific methods - will need to handle pathnames, internal data structures, and - external services such as DBMS's. We would like to be able to do - thinks like: - (importData MyPathName '(:formattype 'csvString)) - (importData '(sqlConnection :server host.domain.net :port 666) - '(:formattype 'table - and so on.")) - -#| -(defun pathname-example (name) - (let ((my-path (parse-namestring name))) - (values (pathname-name my-path :case :common) - (pathname-name my-path :case :local)))) - -(defvar sourceTypes (list 'csv 'lisp 'tsv 'special) - "list of possible symbols. - -Thsees are used to specify source formats that might be supported for -input. CSV and TSV are standard, LISP refers to forms, and SPECIAL -refers to a FUNCTION which parses as appropriately.") - -;;; WRONG LOGIC. -(defmethod importData ((fileHandle pathname) - (fmt list)) ;sourceTypes)) - "File-based input for data. -Usually used by: - (importData (parse-namestring 'path/to/file') - (list :format 'csv)) - - (importData myPathName (list :format 'lisp)) -." - (let* ((fmtType (getf fmt :format)) - (newData (getDataAsLists fileHandle fmtType))) - (case fmtType - ('csv ( )) - ('tsv ( )) - ('lisp ( )) - ('special (let ((parserFcn (getf fmt :special-parser))))) - (:default (error "no standard default importData format"))))) - -(defmethod importData ((ds array) (fmt list)) - "mapping arrays into CLS data.") - -(defmethod importData ((dsSpec DBMSandSQLextract) - (fmt mappingTypes)) - "mapping DBMS into CLS data.") -|# - - -;;(defmacro with-dataframe (env &rest progn) -;; "Compute using variable names with with.data.frame type semantics.") - +;;; -*- mode: lisp -*- + +;;; Time-stamp: <2008-10-03 02:15:49 tony> +;;; Creation: <2008-03-12 17:18:42 blindglobe@gmail.com> +;;; File: data-clos.lisp +;;; Author: AJ Rossini +;;; Copyright: (c)2008, AJ Rossini. BSD, LLGPL, or GPLv2, depending +;;; on how it arrives. +;;; Purpose: data package for lispstat. redoing data structures +;;; in a CLOS based framework. +;;; +;;; No real basis for work, there is a bit of new-ness and R-ness to +;;; this work. In particular, the notion of relation is key and +;;; integral to the analysis. Tables are related and matched +;;; vectors,for example. "column" vectors are related observations +;;; (by measure/recording) while "row" vectors are related readings +;;; (by case) + + +;;; What is this talk of 'release'? Klingons do not make software +;;; 'releases'. Our software 'escapes', leaving a bloody trail of +;;; designers and quality assurance people in its wake. + +;;; This organization and structure is new to the 21st Century +;;; version. + +(in-package :lisp-stat-data-clos) + +;;; Relational structure -- can we capture a completely unnormalized +;;; data strucutre to propose possible modeling approaches, and +;;; propose appropriate models and inferential strategies? + +;; verb-driven schema for data collection. Should encode independence +;; or lack of when possible. + +#+nil(progn + (def-statschema MyDB + :tables (list (list t1 ) + (list t2 ) + (list t4 )) + :unique-key key + :stat-relation '(t1 (:nest-within t2) (:nest-within t3)) + :)) + +;; Need to figure out typed vectors. We then map a series of typed +;; vectors over to tables where columns are equal typed. In a sense, +;; this is a relation (1-1) of equal-typed arrays. For the most part, +;; this ends up making the R data.frame into a relational building +;; block (considering 1-1 mappings using row ID as a relation). +;; Is this a worthwhile generalization? + +;;; verbs vs semantics for dt conversion -- consider the possibily of +;;; how adverbs and verbs relate, where to put which semantically to +;;; allow for general approach. + +;;; eg. Kasper's talk on the FUSION collection of parsers. + + +;;; Need to consider modification APIs +;;; actions are: +;;; - import +;;; - get/set row names (case names) +;;; - column names (variable names) +;;; - dataset values +;;; - annotation/metadata +;;; - make sure that we do coherency checking in the exported +;;; - functions. +;;; - ... +;;; - reshapeData/reformat/reshapr a reformed version of the dataset (no +;;; additional input). +;;; - either overwriting or not, i.e. with or without copy. +;;; - check consistency of resulting data with metadata and related +;;; data information. +;;; - + +(defclass data-pointer () + ((store :initform nil + :initarg :storage + :accessor dataset + :documentation "Data storage: typed as table, array, + relation, or pointer/reference to such.") + (documentation-string :initform nil + :initarg :doc + :accessor doc-string + :documentation "uncomputable information + about statistical-dataset + instance.") + + ;; the rest of this is metadata. In particular, we should find a + ;; more flexible, compact way to store this. + (case-labels :initform nil + :initarg :case-labels + :accessor case-labels + :documentation "labels used for describing cases (doc + metadata), possibly used for merging.") + (var-labels :initform nil + :initarg :var-labels + :accessor var-labels + :documentation "Variable names.")) + (:documentation "Standard Cases by Variables Statistical-Dataset, + i.e. an S data.frame.")) + + +(defgeneric get-variable-matrix (dataset-pointer list-of-variable-names) + (:documentation "retrieves a matrix whose columns are the variable + names in same order specified.")) + +(defgeneric get-variable-vector (dataset-pointer variable-name)) + +;; statistical-dataset is the basic cases by variables framework. +;; Need to embed this within other structures which allow for +;; generalized relations. Goal is to ensure that relations imply and +;; drive the potential for statistical relativeness such as +;; correlation, interference, and similar concepts. +;; +;; Actions on a statistical data structure. diff --git a/src/packages.lisp b/src/packages.lisp index b261b38..ee7c5e0 100644 --- a/src/packages.lisp +++ b/src/packages.lisp @@ -1,6 +1,6 @@ ;;; -*- mode: lisp -*- -;;; Time-stamp: <2008-09-08 08:28:47 tony> +;;; Time-stamp: <2008-10-03 02:25:41 tony> ;;; Creation: <2008-03-11 19:18:34 user> ;;; File: packages.lisp ;;; Author: AJ Rossini @@ -17,19 +17,6 @@ (in-package :cl-user) - - - -;;; REGRESSION using CLOS structure - -(defpackage :lisp-stat-regression-linear-clos - (:use :common-lisp - :clem ) - - (:export regression-model regression-model-obj x y intercept sweep-matrix - basis weights included total-sum-of-squares residual-sum-of-squares - predictor-names response-name case-labels)) - ;;; LispStat Basics (defpackage :lisp-stat-basics @@ -64,11 +51,26 @@ ;;; - - (defpackage :lisp-stat-macros (:use :common-lisp :lisp-stat-compound-data) (:export make-rv-function make-rv-function-1)) -;;; +;;; NEW CLOS STRUCTURE + +(defpackage :lisp-stat-data-clos + (:use :common-lisp + :lisp-matrix) + (:export get-variable-matrix get-variable-vector + ;; generic container class for data -- if small enough + ;; could be value, otherwise might be reference. + data-pointer)) + +(defpackage :lisp-stat-regression-linear-clos + (:use :common-lisp + :lisp-matrix + :lisp-stat-data-clos) + (:export regression-model regression-model-obj x y intercept sweep-matrix + basis weights included total-sum-of-squares residual-sum-of-squares + predictor-names response-name case-labels)) + diff --git a/src/stat-models/regression-clos.lisp b/src/stat-models/regression-clos.lisp dissimilarity index 72% index 45ec801..9fb5d29 100644 --- a/src/stat-models/regression-clos.lisp +++ b/src/stat-models/regression-clos.lisp @@ -1,441 +1,279 @@ -;;; -*- mode: lisp -*- - -;;; File: data.lisp -;;; Author: AJ Rossini -;;; Copyright: (c)2007, AJ Rossini. BSD, LLGPL, or GPLv2, depending -;;; on how it arrives. -;;; Purpose: data package for lispstat -;;; Time-stamp: <2008-03-11 19:18:48 user> -;;; Creation: <2008-03-11 19:18:34 user> - -;;; What is this talk of 'release'? Klingons do not make software -;;; 'releases'. Our software 'escapes', leaving a bloody trail of -;;; designers and quality assurance people in its wake. - -;;; This organization and structure is new to the 21st Century -;;; version. - -;;; regression-clos.lisp -;;; -;;; redoing regression in a CLOS based framework. -;;; See regression.lsp for basis of work. - -(in-package :cl-user) - -(defpackage :lisp-stat-regression-linear-clos - (:use :common-lisp - :clem ) - - (:export regression-model regression-model-obj x y intercept sweep-matrix - basis weights included total-sum-of-squares residual-sum-of-squares - predictor-names response-name case-labels)) - -(in-package :lisp-stat-regression-linear-clos) - -;;; Regresion Model CLOS - -(defclass regression-model-clos (statistical-model) - ((x :initform nil :initarg :x :accessor x) - (y :initform nil :initarg :y :accessor y) - (included :initform nil :initarg :y :accessor y) - (total-sum-of-squares :initform nil :initarg :y :accessor y) - (residual-sum-of-squares :initform nil :initarg :y :accessor y) - (predictor-names :initform nil :initarg :y :accessor y) - (response-name :initform nil :initarg :y :accessor y) - (case-labels :initform nil :initarg :y :accessor y) - (needs-computing :initform T :initarg :compute? :accessor compute?) - (:documentation "Normal Linear Regression Model through CLOS.")) - -(defmethod fit ((regr-inst regression-model-clos)) - "Args: (regr-inst regressino-model-clos) - -Returns a fitted regression model object. To examine the model further -assign the result to a variable and send it messages. Example (data -are in file absorbtion.lsp in the sample data directory/folder): - (def fit-m (fit (new 'regression-model-clos (list iron aluminum) absorbtion))) - (print fit-m) - (plot fit-m :feature 'residuals)" - (let ((x (cond - ((matrixp x) x) - ((vectorp x) (list x)) - ((and (consp x) (numberp (car x))) (list x)) - (t x))) - (m (send regression-model-proto :new))) - (send m :x (if (matrixp x) x (apply #'bind-columns x))) - (setf (slot-value 'y) y) - (send m :intercept intercept) - (send m :weights weights) - (send m :included included) - (send m :predictor-names predictor-names) - (send m :response-name response-name) - (send m :case-labels case-labels) - (if print (send m :display)) - m)) - -(defmeth regression-model-proto :isnew () - (send self :needs-computing t)) - -(defmeth regression-model-proto :save () -"Message args: () -Returns an expression that will reconstruct the regression model." - `(regression-model ',(send self :x) - ',(send self :y) - :intercept ',(send self :intercept) - :weights ',(send self :weights) - :included ',(send self :included) - :predictor-names ',(send self :predictor-names) - :response-name ',(send self :response-name) - :case-labels ',(send self :case-labels))) - -;;; Computing and Display Methods - -(defmeth regression-model-proto :compute () -"Message args: () -Recomputes the estimates. For internal use by other messages" - (let* ((included (if-else (send self :included) 1 0)) - (x (send self :x)) - (y (send self :y)) - (intercept (send self :intercept)) - (weights (send self :weights)) - (w (if weights (* included weights) included)) - (m (make-sweep-matrix x y w)) - (n (array-dimension x 1)) - (p (- (array-dimension m 0) 1)) - (tss (aref m p p)) - (tol (* 0.001 (reduce #'* (mapcar #'standard-deviation (column-list x))))) - ;; (tol (* 0.001 (apply #'* (mapcar #'standard-deviation (column-list x))))) - (sweep-result - (if intercept - (sweep-operator m (iseq 1 n) tol) - (sweep-operator m (iseq 0 n) (cons 0.0 tol))))) - (setf (slot-value 'sweep-matrix) (first sweep-result)) - (setf (slot-value 'total-sum-of-squares) tss) - (setf (slot-value 'residual-sum-of-squares) - (aref (first sweep-result) p p)) - (setf (slot-value 'basis) - (let ((b (remove 0 (second sweep-result)))) - (if b (- (reduce #'- (reverse b)) 1) - (error "no columns could be swept")))))) - -(defmeth regression-model-proto :needs-computing (&optional set) - ;;(declare (ignore self)) - (if set (setf (slot-value 'sweep-matrix) nil)) - (null (slot-value 'sweep-matrix))) - -(defmeth regression-model-proto :display () -"Message args: () -Prints the least squares regression summary. Variables not used in the fit -are marked as aliased." - (let ((coefs (coerce (send self :coef-estimates) 'list)) - (se-s (send self :coef-standard-errors)) - (x (send self :x)) - (p-names (send self :predictor-names))) - (if (send self :weights) - (format t "~%Weighted Least Squares Estimates:~2%") - (format t "~%Least Squares Estimates:~2%")) - (when (send self :intercept) - (format t "Constant ~10f ~A~%" - (car coefs) (list (car se-s))) - (setf coefs (cdr coefs)) - (setf se-s (cdr se-s))) - (dotimes (i (array-dimension x 1)) - (cond - ((member i (send self :basis)) - (format t "~22a ~10f ~A~%" - (select p-names i) (car coefs) (list (car se-s))) - (setf coefs (cdr coefs) se-s (cdr se-s))) - (t (format t "~22a aliased~%" (select p-names i))))) - (format t "~%") - (format t "R Squared: ~10f~%" (send self :r-squared)) - (format t "Sigma hat: ~10f~%" (send self :sigma-hat)) - (format t "Number of cases: ~10d~%" (send self :num-cases)) - (if (/= (send self :num-cases) (send self :num-included)) - (format t "Number of cases used: ~10d~%" (send self :num-included))) - (format t "Degrees of freedom: ~10d~%" (send self :df)) - (format t "~%"))) - -;;; Slot accessors and mutators - -(defmeth regression-model-proto :x (&optional new-x) -"Message args: (&optional new-x) -With no argument returns the x matrix as supplied to m. With an argument -NEW-X sets the x matrix to NEW-X and recomputes the estimates." - (when (and new-x (matrixp new-x)) - (setf (slot-value 'x) new-x) - (send self :needs-computing t)) - (slot-value 'x)) - -(defmeth regression-model-proto :y (&optional new-y) -"Message args: (&optional new-y) -With no argument returns the y sequence as supplied to m. With an argument -NEW-Y sets the y sequence to NEW-Y and recomputes the estimates." - (when (and new-y (or (matrixp new-y) (sequencep new-y))) - (setf (slot-value 'y) new-y) - (send self :needs-computing t)) - (slot-value 'y)) - -(defmeth regression-model-proto :intercept (&optional (val nil set)) -"Message args: (&optional new-intercept) -With no argument returns T if the model includes an intercept term, nil if -not. With an argument NEW-INTERCEPT the model is changed to include or -exclude an intercept, according to the value of NEW-INTERCEPT." - (when set - (setf (slot-value 'intercept) val) - (send self :needs-computing t)) - (slot-value 'intercept)) - -(defmeth regression-model-proto :weights (&optional (new-w nil set)) -"Message args: (&optional new-w) -With no argument returns the weight sequence as supplied to m; NIL means -an unweighted model. NEW-W sets the weights sequence to NEW-W and -recomputes the estimates." - (when set - (setf (slot-value 'weights) new-w) - (send self :needs-computing t)) - (slot-value 'weights)) - -(defmeth regression-model-proto :total-sum-of-squares () -"Message args: () -Returns the total sum of squares around the mean." - (if (send self :needs-computing) (send self :compute)) - (slot-value 'total-sum-of-squares)) - -(defmeth regression-model-proto :residual-sum-of-squares () -"Message args: () -Returns the residual sum of squares for the model." - (if (send self :needs-computing) (send self :compute)) - (slot-value 'residual-sum-of-squares)) - -(defmeth regression-model-proto :basis () -"Message args: () -Returns the indices of the variables used in fitting the model." - (if (send self :needs-computing) (send self :compute)) - (slot-value 'basis)) - -(defmeth regression-model-proto :sweep-matrix () -"Message args: () -Returns the swept sweep matrix. For internal use" - (if (send self :needs-computing) (send self :compute)) - (slot-value 'sweep-matrix)) - -(defmeth regression-model-proto :included (&optional new-included) -"Message args: (&optional new-included) -With no argument, NIL means a case is not used in calculating estimates, and non-nil means it is used. NEW-INCLUDED is a sequence of length of y of nil and t to select cases. Estimates are recomputed." - (when (and new-included - (= (length new-included) (send self :num-cases))) - (setf (slot-value 'included) (copy-seq new-included)) - (send self :needs-computing t)) - (if (slot-value 'included) - (slot-value 'included) - (repeat t (send self :num-cases)))) - -(defmeth regression-model-proto :predictor-names (&optional (names nil set)) -"Message args: (&optional (names nil set)) -With no argument returns the predictor names. NAMES sets the names." - (if set (setf (slot-value 'predictor-names) (mapcar #'string names))) - (let ((p (array-dimension (send self :x) 1)) - (p-names (slot-value 'predictor-names))) - (if (not (and p-names (= (length p-names) p))) - (setf (slot-value 'predictor-names) - (mapcar #'(lambda (a) (format nil "Variable ~a" a)) - (iseq 0 (- p 1)))))) - (slot-value 'predictor-names)) - -(defmeth regression-model-proto :response-name (&optional (name "Y" set)) - "Message args: (&optional name) -With no argument returns the response name. NAME sets the name." - ;;(declare (ignore self)) - (if set (setf (slot-value 'response-name) (if name (string name) "Y"))) - (slot-value 'response-name)) - -(defmeth regression-model-proto :case-labels (&optional (labels nil set)) -"Message args: (&optional labels) -With no argument returns the case-labels. LABELS sets the labels." - (if set (setf (slot-value 'case-labels) - (if labels - (mapcar #'string labels) - (mapcar #'(lambda (x) (format nil "~d" x)) - (iseq 0 (- (send self :num-cases) 1)))))) - (slot-value 'case-labels)) - -;;; -;;; Other Methods -;;; None of these methods access any slots directly. -;;; - -(defmeth regression-model-proto :num-cases () -"Message args: () -Returns the number of cases in the model." - (length (send self :y))) - -(defmeth regression-model-proto :num-included () -"Message args: () -Returns the number of cases used in the computations." - (sum (if-else (send self :included) 1 0))) - -(defmeth regression-model-proto :num-coefs () -"Message args: () -Returns the number of coefficients in the fit model (including the -intercept if the model includes one)." - (if (send self :intercept) - (+ 1 (length (send self :basis))) - (length (send self :basis)))) - -(defmeth regression-model-proto :df () -"Message args: () -Returns the number of degrees of freedom in the model." - (- (send self :num-included) (send self :num-coefs))) - -(defmeth regression-model-proto :x-matrix () -"Message args: () -Returns the X matrix for the model, including a column of 1's, if -appropriate. Columns of X matrix correspond to entries in basis." - (let ((m (select (send self :x) - (iseq 0 (- (send self :num-cases) 1)) - (send self :basis)))) - (if (send self :intercept) - (bind-columns (repeat 1 (send self :num-cases)) m) - m))) - -(defmeth regression-model-proto :leverages () -"Message args: () -Returns the diagonal elements of the hat matrix." - (let* ((weights (send self :weights)) - (x (send self :x-matrix)) - (raw-levs - (matmult (* (matmult x (send self :xtxinv)) x) - (repeat 1 (send self :num-coefs))))) - (if weights (* weights raw-levs) raw-levs))) - -(defmeth regression-model-proto :fit-values () -"Message args: () -Returns the fitted values for the model." - (matmult (send self :x-matrix) (send self :coef-estimates))) - -(defmeth regression-model-proto :raw-residuals () -"Message args: () -Returns the raw residuals for a model." - (- (send self :y) (send self :fit-values))) - -(defmeth regression-model-proto :residuals () -"Message args: () -Returns the raw residuals for a model without weights. If the model -includes weights the raw residuals times the square roots of the weights -are returned." - (let ((raw-residuals (send self :raw-residuals)) - (weights (send self :weights))) - (if weights (* (sqrt weights) raw-residuals) raw-residuals))) - -(defmeth regression-model-proto :sum-of-squares () -"Message args: () -Returns the error sum of squares for the model." - (send self :residual-sum-of-squares)) - -(defmeth regression-model-proto :sigma-hat () -"Message args: () -Returns the estimated standard deviation of the deviations about the -regression line." - (let ((ss (send self :sum-of-squares)) - (df (send self :df))) - (if (/= df 0) (sqrt (/ ss df))))) - -;; for models without an intercept the 'usual' formula for R^2 can give -;; negative results; hence the max. -(defmeth regression-model-proto :r-squared () -"Message args: () -Returns the sample squared multiple correlation coefficient, R squared, for -the regression." - (max (- 1 (/ (send self :sum-of-squares) (send self :total-sum-of-squares))) - 0)) - -(defmeth regression-model-proto :coef-estimates () -"Message args: () -Returns the OLS (ordinary least squares) estimates of the regression -coefficients. Entries beyond the intercept correspond to entries in basis." - (let ((n (array-dimension (send self :x) 1)) - (indices (if (send self :intercept) - (cons 0 (+ 1 (send self :basis))) - (+ 1 (send self :basis)))) - (m (send self :sweep-matrix))) - (coerce (compound-data-seq (select m (+ 1 n) indices)) 'list))) - -(defmeth regression-model-proto :xtxinv () -"Message args: () -Returns ((X^T) X)^(-1) or ((X^T) W X)^(-1)." - (let ((indices (if (send self :intercept) - (cons 0 (1+ (send self :basis))) - (1+ (send self :basis))))) - (select (send self :sweep-matrix) indices indices))) - -(defmeth regression-model-proto :coef-standard-errors () -"Message args: () -Returns estimated standard errors of coefficients. Entries beyond the -intercept correspond to entries in basis." - (let ((s (send self :sigma-hat))) - (if s (* (send self :sigma-hat) (sqrt (diagonal (send self :xtxinv))))))) - -(defmeth regression-model-proto :studentized-residuals () -"Message args: () -Computes the internally studentized residuals for included cases and externally studentized residuals for excluded cases." - (let ((res (send self :residuals)) - (lev (send self :leverages)) - (sig (send self :sigma-hat)) - (inc (send self :included))) - (if-else inc - (/ res (* sig (sqrt (pmax .00001 (- 1 lev))))) - (/ res (* sig (sqrt (+ 1 lev))))))) - -(defmeth regression-model-proto :externally-studentized-residuals () -"Message args: () -Computes the externally studentized residuals." - (let* ((res (send self :studentized-residuals)) - (df (send self :df))) - (if-else (send self :included) - (* res (sqrt (/ (- df 1) (- df (^ res 2))))) - res))) - -(defmeth regression-model-proto :cooks-distances () -"Message args: () -Computes Cook's distances." - (let ((lev (send self :leverages)) - (res (/ (^ (send self :studentized-residuals) 2) - (send self :num-coefs)))) - (if-else (send self :included) (* res (/ lev (- 1 lev) )) (* res lev)))) - - -(defun plot-points (x y &rest args) - "FIXME!!" - (declare (ignore x y args)) - (error "Graphics not implemented yet.")) - -;; Can not plot points yet!! -(defmeth regression-model-proto :plot-residuals (&optional x-values) -"Message args: (&optional x-values) -Opens a window with a plot of the residuals. If X-VALUES are not supplied -the fitted values are used. The plot can be linked to other plots with the -link-views function. Returns a plot object." - (plot-points (if x-values x-values (send self :fit-values)) - (send self :residuals) - :title "Residual Plot" - :point-labels (send self :case-labels))) - -(defmeth regression-model-proto :plot-bayes-residuals - (&optional x-values) - "Message args: (&optional x-values) -Opens a window with a plot of the standardized residuals and two standard -error bars for the posterior distribution of the actual deviations from the -line. See Chaloner and Brant. If X-VALUES are not supplied the fitted values -are used. The plot can be linked to other plots with the link-views function. -Returns a plot object." - (let* ((r (/ (send self :residuals) (send self :sigma-hat))) - (d (* 2 (sqrt (send self :leverages)))) - (low (- r d)) - (high (+ r d)) - (x-values (if x-values x-values (send self :fit-values))) - (p (plot-points x-values r - :title "Bayes Residual Plot" - :point-labels (send self :case-labels)))) - (map 'list #'(lambda (a b c d) (send p :plotline a b c d nil)) - x-values low x-values high) - (send p :adjust-to-data) - p)) +;;; -*- mode: lisp -*- + +;;; Time-stamp: <2008-10-03 02:07:22 tony> +;;; Creation: <2008-10-03 02:07:10 tony> +;;; Author: AJ Rossini +;;; Copyright: (c)2007, AJ Rossini. BSD, LLGPL, or GPLv2, depending +;;; on how it arrives. +;;; Purpose: redoing regression in a CLOS based framework. See +;;; regression.lsp for basis of work. + +;;; What is this talk of 'release'? Klingons do not make software +;;; 'releases'. Our software 'escapes', leaving a bloody trail of +;;; designers and quality assurance people in its wake. + +;;; This organization and structure is new to the 21st Century +;;; version. + +(in-package :lisp-stat-regression-linear-clos) + +;;; Regresion Model CLOS, data structures + +(defclass regression-model-clos (statistical-model) + ((x :initform nil :initarg :x :accessor x) + (y :initform nil :initarg :y :accessor y) + (included :initform nil :initarg :y :accessor y) + (total-sum-of-squares :initform nil :initarg :y :accessor y) + (residual-sum-of-squares :initform nil :initarg :y :accessor y) + (predictor-names :initform nil :initarg :y :accessor y) + (response-name :initform nil :initarg :y :accessor y) + (case-labels :initform nil :initarg :y :accessor y) + (needs-computing :initform T :initarg :compute? :accessor compute?)) + (:documentation "Normal Linear Regression Model through CLOS. + Historical design based on what was done for LispStat, not modern.")) + +(defclass model-specification () + ((spec-string :initform nil + :initarg :specification + :accessor :specification) + (spec-form :initform nil + :initarg :spec-form + :accessor :spec-form) + (model-class :initform nil)) + (:documentation "container for mathematical structure")) + +(defclass bayesian-model-specification (model-specification) + ((prior-model-class) + (spec-string :initform nil + :initarg :specification + :accessor :specification) + (spec-form :initform nil + :initarg :spec-form + :accessor :spec-form)) + (:documentation "adds structure holding priors to the model")) + +;;; The following should be self-created based on introspection of +;;; available: +;;; ## inferential technologies (bayesian, frequentist, etc), +;;; ## optimization criteria (likelihood, least-squares, min-entropy, +;;; minimax, etc) +;;; ## simplification macros, i.e. mapping directly to linear +;;; regression and other applications. fast specialized +;;; algorithms for edge cases and narrow conditions. +;;; ## + +(defparameter *model-class-list* + '((linear-regression frequentist) + (generalized-linear-regression parametric) + (linear-regression bayesian) + ())) + + + +;;; Regression model generics and methods + +(defgeneric regression-model (model-spec data-pointer &key debug) + (:documentation "CLOSy framework for regression, using numerics from ")) + +(defmethod regression-model + ((regr-inst regression-model-clos) + (data-ptr data-pointer) + &key debug) + "Args: (regr-inst regressino-model-clos) + +Returns a fitted regression model object. To examine the model further +assign the result to a variable and send it messages. Example (data +are in file absorbtion.lsp in the sample data directory/folder): + + (def fit-m (fit (new 'regression-model-clos (list iron aluminum) absorbtion))) + (print fit-m) + (plot fit-m :feature 'residuals) +" + (let ((x (get-variable-matrix (x regr-inst) data-ptr)) + (y (get-variable-vector (y regr-inst) data-ptr))) + + +(defmeth regression-model-proto :compute () + "Message args: () +Recomputes the estimates. For internal use by other messages" + (let* ((included (if-else (send self :included) 1 0)) + (x (send self :x)) + (y (send self :y)) + (intercept (send self :intercept)) + (weights (send self :weights)) + (w (if weights (* included weights) included)) + (m (make-sweep-matrix x y w)) + (n (array-dimension x 1)) + (p (- (array-dimension m 0) 1)) + (tss (aref m p p)) + (tol (* 0.001 (reduce #'* (mapcar #'standard-deviation (column-list x))))) + ;; (tol (* 0.001 (apply #'* (mapcar #'standard-deviation (column-list x))))) + (sweep-result + (if intercept + (sweep-operator m (iseq 1 n) tol) + (sweep-operator m (iseq 0 n) (cons 0.0 tol))))) + (setf (slot-value 'sweep-matrix) (first sweep-result)) + (setf (slot-value 'total-sum-of-squares) tss) + (setf (slot-value 'residual-sum-of-squares) + (aref (first sweep-result) p p)) + (setf (slot-value 'basis) + (let ((b (remove 0 (second sweep-result)))) + (if b (- (reduce #'- (reverse b)) 1) + (error "no columns could be swept")))))) + +)) + + +(defmeth regression-model-proto :display () + "Message args: () +Prints the least squares regression summary. Variables not used in the fit +are marked as aliased." + (let ((coefs (coerce (send self :coef-estimates) 'list)) + (se-s (send self :coef-standard-errors)) + (x (send self :x)) + (p-names (send self :predictor-names))) + (if (send self :weights) + (format t "~%Weighted Least Squares Estimates:~2%") + (format t "~%Least Squares Estimates:~2%")) + (when (send self :intercept) + (format t "Constant ~10f ~A~%" + (car coefs) (list (car se-s))) + (setf coefs (cdr coefs)) + (setf se-s (cdr se-s))) + (dotimes (i (array-dimension x 1)) + (cond + ((member i (send self :basis)) + (format t "~22a ~10f ~A~%" + (select p-names i) (car coefs) (list (car se-s))) + (setf coefs (cdr coefs) se-s (cdr se-s))) + (t (format t "~22a aliased~%" (select p-names i))))) + (format t "~%") + (format t "R Squared: ~10f~%" (send self :r-squared)) + (format t "Sigma hat: ~10f~%" (send self :sigma-hat)) + (format t "Number of cases: ~10d~%" (send self :num-cases)) + (if (/= (send self :num-cases) (send self :num-included)) + (format t "Number of cases used: ~10d~%" (send self :num-included))) + (format t "Degrees of freedom: ~10d~%" (send self :df)) + (format t "~%"))) + +;;; Slot accessors and mutators + + +(defmeth regression-model-proto :included (&optional new-included) + "Message args: (&optional new-included) +With no argument, NIL means a case is not used in calculating estimates, and non-nil means it is used. NEW-INCLUDED is a sequence of length of y of nil and t to select cases. Estimates are recomputed." + (when (and new-included + (= (length new-included) (send self :num-cases))) + (setf (slot-value 'included) (copy-seq new-included)) + (send self :needs-computing t)) + (if (slot-value 'included) + (slot-value 'included) + (repeat t (send self :num-cases)))) + + + +(defmeth regression-model-proto :leverages () + "Message args: () +Returns the diagonal elements of the hat matrix." + (let* ((weights (send self :weights)) + (x (send self :x-matrix)) + (raw-levs + (matmult (* (matmult x (send self :xtxinv)) x) + (repeat 1 (send self :num-coefs))))) + (if weights (* weights raw-levs) raw-levs))) + + +(defmeth regression-model-proto :raw-residuals () + "Message args: () +Returns the raw residuals for a model." + (- (send self :y) (send self :fit-values))) + +(defmeth regression-model-proto :residuals () + "Message args: () +Returns the raw residuals for a model without weights. If the model +includes weights the raw residuals times the square roots of the weights +are returned." + (let ((raw-residuals (send self :raw-residuals)) + (weights (send self :weights))) + (if weights (* (sqrt weights) raw-residuals) raw-residuals))) + + +(defmeth regression-model-proto :sigma-hat () + "Message args: () +Returns the estimated standard deviation of the deviations about the +regression line." + (let ((ss (send self :sum-of-squares)) + (df (send self :df))) + (if (/= df 0) (sqrt (/ ss df))))) + + +(defmeth regression-model-proto :coef-estimates () + "Message args: () +Returns the OLS (ordinary least squares) estimates of the regression +coefficients. Entries beyond the intercept correspond to entries in basis." + (let ((n (array-dimension (send self :x) 1)) + (indices (if (send self :intercept) + (cons 0 (+ 1 (send self :basis))) + (+ 1 (send self :basis)))) + (m (send self :sweep-matrix))) + (coerce (compound-data-seq (select m (+ 1 n) indices)) 'list))) + +(defmeth regression-model-proto :xtxinv () + "Message args: () +Returns ((X^T) X)^(-1) or ((X^T) W X)^(-1)." + (let ((indices (if (send self :intercept) + (cons 0 (1+ (send self :basis))) + (1+ (send self :basis))))) + (select (send self :sweep-matrix) indices indices))) + + +(defmeth regression-model-proto :studentized-residuals () + "Message args: () +Computes the internally studentized residuals for included cases and externally studentized residuals for excluded cases." + (let ((res (send self :residuals)) + (lev (send self :leverages)) + (sig (send self :sigma-hat)) + (inc (send self :included))) + (if-else inc + (/ res (* sig (sqrt (pmax .00001 (- 1 lev))))) + (/ res (* sig (sqrt (+ 1 lev))))))) + +(defmeth regression-model-proto :externally-studentized-residuals () + "Message args: () +Computes the externally studentized residuals." + (let* ((res (send self :studentized-residuals)) + (df (send self :df))) + (if-else (send self :included) + (* res (sqrt (/ (- df 1) (- df (^ res 2))))) + res))) + +(defmeth regression-model-proto :cooks-distances () + "Message args: () +Computes Cook's distances." + (let ((lev (send self :leverages)) + (res (/ (^ (send self :studentized-residuals) 2) + (send self :num-coefs)))) + (if-else (send self :included) (* res (/ lev (- 1 lev) )) (* res lev)))) + + + +(defmeth regression-model-proto :plot-bayes-residuals + (&optional x-values) + "Message args: (&optional x-values) +Opens a window with a plot of the standardized residuals and two standard +error bars for the posterior distribution of the actual deviations from the +line. See Chaloner and Brant. If X-VALUES are not supplied the fitted values +are used. The plot can be linked to other plots with the link-views function. +Returns a plot object." + (let* ((r (/ (send self :residuals) (send self :sigma-hat))) + (d (* 2 (sqrt (send self :leverages)))) + (low (- r d)) + (high (+ r d)) + (x-values (if x-values x-values (send self :fit-values))) + (p (plot-points x-values r + :title "Bayes Residual Plot" + :point-labels (send self :case-labels)))) + (map 'list #'(lambda (a b c d) (send p :plotline a b c d nil)) + x-values low x-values high) + (send p :adjust-to-data) + p)) -- 2.11.4.GIT