From 94cace1ba0bf554d082d72d267b6579c75f91b73 Mon Sep 17 00:00:00 2001 From: AJ Rossini Date: Wed, 29 Apr 2009 08:15:28 +0200 Subject: [PATCH] migrated working stuff into ls-demo from TODO. Signed-off-by: AJ Rossini --- TODO.lisp | 324 +---------------------------------------------------------- ls-demo.lisp | 258 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 262 insertions(+), 320 deletions(-) diff --git a/TODO.lisp b/TODO.lisp index 4c6cef3..feec3a1 100644 --- a/TODO.lisp +++ b/TODO.lisp @@ -1,6 +1,6 @@ ;;; -*- mode: lisp -*- -;;; Time-stamp: <2009-04-20 19:01:23 tony> +;;; Time-stamp: <2009-04-28 13:10:21 tony> ;;; Creation: <2008-09-08 08:06:30 tony> ;;; File: TODO.lisp ;;; Author: AJ Rossini @@ -43,66 +43,8 @@ (in-package :ls-user) -#+nil -(progn ;; FIXME: Need to clean up data examples, licenses, attributions, etc. - ;; The following breaks because we should use a package to hold - ;; configuration details, and this would be the only package outside - ;; of packages.lisp, as it holds the overall defsystem structure. - (load-data "iris.lsp") ;; (the above partially fixed). - (variables) - diabetes ) - - -(progn ;; Importing data from DSV text files. - - (defparameter *my-df-2* - (make-instance 'dataframe-array - :storage - (listoflist->array - (cybertiggyr-dsv::load-escaped - "/media/disk/Desktop/sandbox/CLS.git/Data/example-mixed.csv")) - :doc "This is an interesting dataframe-array")) -#| :case-labels (list "x" "y") - :var-labels (list "a" "b" "c" "d" "e") -|# - - ;; a better approach is: - (asdf:oos 'asdf:load-op 'rsm-string) - (rsm.string:file->string-table - "/media/disk/Desktop/sandbox/CLS.git/Data/example-mixed.csv") - - (rsm.string:file->number-table - "/media/disk/Desktop/sandbox/CLS.git/Data/example-numeric.csv") - - (defparameter *my-df-2* - (make-instance 'dataframe-array - :storage - (listoflist->array - (transpose-listoflist - (rsm.string:file->string-table - "/media/disk/Desktop/sandbox/CLS.git/Data/example-mixed.csv"))) - :doc "This is an interesting dataframe-array")) - *my-df-2* - - (defparameter *my-df-3* - (make-instance 'dataframe-array - :storage - (listoflist->array - (transpose-listoflist - (rsm.string:file->number-table - "/media/disk/Desktop/sandbox/CLS.git/Data/example-numeric.csv"))) - :doc "This is an interesting dataframe-array")) - *my-df-3* - - ;; Need to the this using the make-dataframe example, or write a - ;; dsvfile->dataframe command. - ) - - - - (progn - ;; Plotting + ;; Plotting -- need to figure out the core-dump, or change libraries. ;; (asdf:oos 'asdf:load-op 'cl-plplot) (plot-ex) @@ -116,9 +58,7 @@ (contour-plot-ex) (fn-contour-plot-ex) (shade-plot-ex) - (3D-plot-ex) - - ) + (3D-plot-ex)) (progn @@ -157,8 +97,8 @@ (asdf:oos 'asdf:load-op 'gsll-tests) ;; the following should be equivalent - (setf *t1* (LIST 6.18d0 6.647777777777779d0 6.18d0)) - (setf *t2* (MULTIPLE-VALUE-LIST + (defparameter *t1* (LIST 6.18d0 6.647777777777779d0 6.18d0)) + (defparameter *t2* (MULTIPLE-VALUE-LIST (LET ((VEC (gsll:make-marray 'DOUBLE-FLOAT :INITIAL-CONTENTS '(-3.21d0 1.0d0 12.8d0))) @@ -303,257 +243,3 @@ :partially-parametric)) "start of ontology")) - -;;;; LM - -(progn - - (defparameter *y* - (make-vector - 8 - :type :row - :initial-contents '((1d0 2d0 3d0 4d0 5d0 6d0 7d0 8d0)))) - - - (defparameter *xv+1* - (make-matrix - 8 2 - :initial-contents '((1d0 1d0) - (1d0 3d0) - (1d0 2d0) - (1d0 4d0) - (1d0 3d0) - (1d0 5d0) - (1d0 4d0) - (1d0 6d0)))) - - - ;; so something like (NOTE: matrices are transposed to begin with, hence the incongruety) - (defparameter *xtx-2* (m* (transpose *xv+1*) *xv+1*)) - ;; # - - (defparameter *xty-2* (m* (transpose *xv+1*) (transpose *y*))) - ;; # - - (defparameter *rcond-2* 0.000001) - (defparameter *betahat-2* (gelsy *xtx-2* *xty-2* *rcond-2*)) - ;; *xtx-2* => "details of complete orthogonal factorization" - ;; according to man page: - ;; # - - ;; *xty-2* => output becomes solution: - ;; # - - *betahat-2* ; which matches R, see below - - (documentation 'gelsy 'function) - - -;; (# -;; 2) - -;; ## Test case in R: -;; x <- c( 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0) -;; y <- c( 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0) -;; lm(y~x) -;; ## => Call: lm(formula = y ~ x) - -;; Coefficients: (Intercept) x -;; -0.1667 1.3333 - -;; summary(lm(y~x)) -;; ## => - -;; Call: -;; lm(formula = y ~ x) - -;; Residuals: -;; Min 1Q Median 3Q Max -;; -1.833e+00 -6.667e-01 -3.886e-16 6.667e-01 1.833e+00 - -;; Coefficients: -;; Estimate Std. Error t value Pr(>|t|) -;; (Intercept) -0.1667 1.1587 -0.144 0.89034 -;; x 1.3333 0.3043 4.382 0.00466 ** -;; --- -;; Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 - -;; Residual standard error: 1.291 on 6 degrees of freedom -;; Multiple R-squared: 0.7619, Adjusted R-squared: 0.7222 -;; F-statistic: 19.2 on 1 and 6 DF, p-value: 0.004659 - - - - ;; which suggests one might do (modulo ensuring correct - ;; orientations). When this is finalized, it should migrate to - ;; CLS. - ;; - - - (defparameter *n* 20) ; # rows = # obsns - (defparameter *p* 10) ; # cols = # vars - (defparameter *x-temp* (rand *n* *p*)) - (defparameter *b-temp* (rand *p* 1)) - (defparameter *y-temp* (m* *x-temp* *b-temp*)) - ;; so Y=Xb + \eps - (defparameter *rcond* (* (coerce (expt 2 -52) 'double-float) - (max (nrows *x-temp*) (ncols *y-temp*)))) - (defparameter *orig-x* (copy *x-temp*)) - (defparameter *orig-b* (copy *b-temp*)) - (defparameter *orig-y* (copy *y-temp*)) - - (defparameter *lm-result* (lm *x-temp* *y-temp*)) - (princ (first *lm-result*)) - (princ (second *lm-result*)) - (princ (third *lm-result*)) - (v= (third *lm-result*) - (v- (first (first *lm-result*)) - (first (second *lm-result*)))) - - - - - ;; Some issues exist in the LAPACK vs. LINPACK variants, hence R - ;; uses LINPACK primarily, rather than LAPACK. See comments in R - ;; source for issues. - - - ;; Goal is to start from X, Y and then realize that if - ;; Y = X \beta, then, i.e. 8x1 = 8xp px1 + 8x1 - ;; XtX \hat\beta = Xt Y - ;; so that we can solve the equation W \beta = Z where W and Z - ;; are known, to estimate \beta. - - ;; the above is known to be numerically instable -- some processing - ;; of X is preferred and should be done prior. And most of the - ;; transformation-based work does precisely that. - - ;; recall: Var[Y] = E[(Y - E[Y])(Y-E[Y])t] - ;; = E[Y Yt] - 2 \mu \mut + \mu \mut - ;; = E[Y Yt] - \mu \mut - - ;; Var Y = E[Y^2] - \mu^2 - - - ;; For initial estimates of covariance of \hat\beta: - - ;; \hat\beta = (Xt X)^-1 Xt Y - ;; with E[ \hat\beta ] - ;; = E[ (Xt X)^-1 Xt Y ] - ;; = E[(Xt X)^-1 Xt (X\beta)] - ;; = \beta - ;; - ;; So Var[\hat\beta] = ... - ;; (Xt X) - ;; and this gives SE(\beta_i) = (* (sqrt (mref Var i i)) adjustment) - - - ;; from docs: - - (setf *temp-result* - (let ((*default-implementation* :foreign-array)) - (let* ((m 10) - (n 10) - (a (rand m n)) - (x (rand n 1)) - (b (m* a x)) - (rcond (* (coerce (expt 2 -52) 'double-float) - (max (nrows a) (ncols a)))) - (orig-a (copy a)) - (orig-b (copy b)) - (orig-x (copy x))) - (list x (gelsy a b rcond)) - ;; no applicable conversion? - ;; (m- (# - ;; 1) - - ;; ## Test case in R: - ;; x <- c( 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0) - ;; y <- c( 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0) - ;; lm(y~x-1) - ;; ## => - ;; Call: - ;; lm(formula = y ~ x - 1) - - ;; Coefficients: - ;; x - ;; 1.293 - - (first *betahat*)) - - - -#| - (type-of #2A((1 2 3 4 5) - (10 20 30 40 50))) - - (type-of (rand 10 20)) - - (typep #2A((1 2 3 4 5) - (10 20 30 40 50)) - 'matrix-like) - - (typep (rand 10 20) 'matrix-like) - - (typep #2A((1 2 3 4 5) - (10 20 30 40 50)) - 'array) - - (typep (rand 10 20) 'array) -|# diff --git a/ls-demo.lisp b/ls-demo.lisp index 53fd3e5..7499a49 100644 --- a/ls-demo.lisp +++ b/ls-demo.lisp @@ -3,7 +3,7 @@ ;;; See COPYRIGHT file for any additional restrictions (BSD license). ;;; Since 1991, ANSI was finally finished. Edited for ANSI Common Lisp. -;;; Time-stamp: <2009-04-17 13:32:06 tony> +;;; Time-stamp: <2009-04-24 15:36:52 tony> ;;; Creation: sometime in 2006... ;;; File: ls-demo.lisp ;;; Author: AJ Rossini @@ -855,3 +855,259 @@ my.lib (m= *xv+1a* *xv+1b*) ; => T (princ "Data Set up")) + + + +;;;; LM + +(progn + + (defparameter *y* + (make-vector + 8 + :type :row + :initial-contents '((1d0 2d0 3d0 4d0 5d0 6d0 7d0 8d0)))) + + + (defparameter *xv+1* + (make-matrix + 8 2 + :initial-contents '((1d0 1d0) + (1d0 3d0) + (1d0 2d0) + (1d0 4d0) + (1d0 3d0) + (1d0 5d0) + (1d0 4d0) + (1d0 6d0)))) + + + ;; so something like (NOTE: matrices are transposed to begin with, hence the incongruety) + (defparameter *xtx-2* (m* (transpose *xv+1*) *xv+1*)) + ;; # + + (defparameter *xty-2* (m* (transpose *xv+1*) (transpose *y*))) + ;; # + + (defparameter *rcond-2* 0.000001) + (defparameter *betahat-2* (gelsy *xtx-2* *xty-2* *rcond-2*)) + ;; *xtx-2* => "details of complete orthogonal factorization" + ;; according to man page: + ;; # + + ;; *xty-2* => output becomes solution: + ;; # + + *betahat-2* ; which matches R, see below + + (documentation 'gelsy 'function) + + +;; (# +;; 2) + +;; ## Test case in R: +;; x <- c( 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0) +;; y <- c( 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0) +;; lm(y~x) +;; ## => Call: lm(formula = y ~ x) + +;; Coefficients: (Intercept) x +;; -0.1667 1.3333 + +;; summary(lm(y~x)) +;; ## => + +;; Call: +;; lm(formula = y ~ x) + +;; Residuals: +;; Min 1Q Median 3Q Max +;; -1.833e+00 -6.667e-01 -3.886e-16 6.667e-01 1.833e+00 + +;; Coefficients: +;; Estimate Std. Error t value Pr(>|t|) +;; (Intercept) -0.1667 1.1587 -0.144 0.89034 +;; x 1.3333 0.3043 4.382 0.00466 ** +;; --- +;; Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 + +;; Residual standard error: 1.291 on 6 degrees of freedom +;; Multiple R-squared: 0.7619, Adjusted R-squared: 0.7222 +;; F-statistic: 19.2 on 1 and 6 DF, p-value: 0.004659 + + + + ;; which suggests one might do (modulo ensuring correct + ;; orientations). When this is finalized, it should migrate to + ;; CLS. + ;; + + + (defparameter *n* 20) ; # rows = # obsns + (defparameter *p* 10) ; # cols = # vars + (defparameter *x-temp* (rand *n* *p*)) + (defparameter *b-temp* (rand *p* 1)) + (defparameter *y-temp* (m* *x-temp* *b-temp*)) + ;; so Y=Xb + \eps + (defparameter *rcond* (* (coerce (expt 2 -52) 'double-float) + (max (nrows *x-temp*) (ncols *y-temp*)))) + (defparameter *orig-x* (copy *x-temp*)) + (defparameter *orig-b* (copy *b-temp*)) + (defparameter *orig-y* (copy *y-temp*)) + + (defparameter *lm-result* (lm *x-temp* *y-temp*)) + (princ (first *lm-result*)) + (princ (second *lm-result*)) + (princ (third *lm-result*)) + (v= (third *lm-result*) + (v- (first (first *lm-result*)) + (first (second *lm-result*)))) + + + + + ;; Some issues exist in the LAPACK vs. LINPACK variants, hence R + ;; uses LINPACK primarily, rather than LAPACK. See comments in R + ;; source for issues. + + + ;; Goal is to start from X, Y and then realize that if + ;; Y = X \beta, then, i.e. 8x1 = 8xp px1 + 8x1 + ;; XtX \hat\beta = Xt Y + ;; so that we can solve the equation W \beta = Z where W and Z + ;; are known, to estimate \beta. + + ;; the above is known to be numerically instable -- some processing + ;; of X is preferred and should be done prior. And most of the + ;; transformation-based work does precisely that. + + ;; recall: Var[Y] = E[(Y - E[Y])(Y-E[Y])t] + ;; = E[Y Yt] - 2 \mu \mut + \mu \mut + ;; = E[Y Yt] - \mu \mut + + ;; Var Y = E[Y^2] - \mu^2 + + + ;; For initial estimates of covariance of \hat\beta: + + ;; \hat\beta = (Xt X)^-1 Xt Y + ;; with E[ \hat\beta ] + ;; = E[ (Xt X)^-1 Xt Y ] + ;; = E[(Xt X)^-1 Xt (X\beta)] + ;; = \beta + ;; + ;; So Var[\hat\beta] = ... + ;; (Xt X) + ;; and this gives SE(\beta_i) = (* (sqrt (mref Var i i)) adjustment) + + + ;; from docs: + + (setf *temp-result* + (let ((*default-implementation* :foreign-array)) + (let* ((m 10) + (n 10) + (a (rand m n)) + (x (rand n 1)) + (b (m* a x)) + (rcond (* (coerce (expt 2 -52) 'double-float) + (max (nrows a) (ncols a)))) + (orig-a (copy a)) + (orig-b (copy b)) + (orig-x (copy x))) + (list x (gelsy a b rcond)) + ;; no applicable conversion? + ;; (m- (# + ;; 1) + + ;; ## Test case in R: + ;; x <- c( 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0) + ;; y <- c( 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0) + ;; lm(y~x-1) + ;; ## => + ;; Call: + ;; lm(formula = y ~ x - 1) + + ;; Coefficients: + ;; x + ;; 1.293 + + (first *betahat*)) + + + +#| + (type-of #2A((1 2 3 4 5) + (10 20 30 40 50))) + + (type-of (rand 10 20)) + + (typep #2A((1 2 3 4 5) + (10 20 30 40 50)) + 'matrix-like) + + (typep (rand 10 20) 'matrix-like) + + (typep #2A((1 2 3 4 5) + (10 20 30 40 50)) + 'array) + + (typep (rand 10 20) 'array) +|# -- 2.11.4.GIT