3 ;;; Time-stamp: <2009-04-17 18:25:00 tony>
4 ;;; Creation: <2008-09-08 08:06:30 tony>
6 ;;; Author: AJ Rossini <blindglobe@gmail.com>
7 ;;; Copyright: (c) 2007-2008, AJ Rossini <blindglobe@gmail.com>. BSD.
8 ;;; Purpose: Stuff that needs to be made working sits inside the
9 ;;; progns... This file contains the current challenges to
10 ;;; solve, including a description of the setup and the work
13 ;;; What is this talk of 'release'? Klingons do not make software
14 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
15 ;;; designers and quality assurance people in its wake.
20 ;;(asdf:oos 'asdf:load-op 'lisp-matrix)
21 ;;(asdf:oos 'asdf:compile-op 'lispstat)
22 ;;(asdf:oos 'asdf:load-op 'lispstat)
24 (in-package :lisp-stat-unittests
)
26 ;; tests = 80, failures = 8, errors = 15
27 (run-tests :suite
'lisp-stat-ut
)
28 (describe (run-tests :suite
'lisp-stat-ut
))
30 ;; FIXME: Example: currently not relevant, yet
31 ;; (describe (lift::run-test :test-case 'lisp-stat-unittests::create-proto
32 ;; :suite 'lisp-stat-unittests::lisp-stat-ut-proto))
34 (describe (lift::run-tests
:suite
'lisp-stat-ut-dataframe
))
35 (lift::run-tests
:suite
'lisp-stat-ut-dataframe
)
39 :test-case
'lisp-stat-unittests
::create-proto
40 :suite
'lisp-stat-unittests
::lisp-stat-ut-proto
))
42 (describe 'lisp-stat-ut
)
46 (progn ;; FIXME: Regression modeling (some data future-ish).
49 ;; - confirm estimates for multivariate case,
50 ;; - pretty-print output
51 ;; - fix up API -- what do we want this to look like?
54 (regression-model (list->vector-like iron
) ;; BROKEN
55 (list->vector-like absorbtion
))
65 (covariance-matrix *m-fit
*)
68 (regression-model (transpose (listoflist->matrix-like
(list iron aluminum
)
69 :orientation
:row-major
))
70 (list->vector-like absorbtion
) ))
72 (defparameter *m3-fit
*
75 ;; Should the above look something like:
76 (defparameter *m3-fit
*
77 (spec-and-fit-model '(absorbtion = iron aluminum
)))
78 ;; in which case we split the list before/after the "=" character.
82 (covariance-matrix *m3-fit
*))
86 (progn ;; FIXME: Need to clean up data examples, licenses, attributions, etc.
87 ;; The following breaks because we should use a package to hold
88 ;; configuration details, and this would be the only package outside
89 ;; of packages.lisp, as it holds the overall defsystem structure.
90 (load-data "iris.lsp") ;; (the above partially fixed).
96 (progn ;; Importing data from DSV text files.
98 (defparameter *my-df-2
*
99 (make-instance 'dataframe-array
102 (cybertiggyr-dsv::load-escaped
103 "/media/disk/Desktop/sandbox/CLS.git/Data/example-mixed.csv"))
104 :doc
"This is an interesting dataframe-array"))
105 #|
:case-labels
(list "x" "y")
106 :var-labels
(list "a" "b" "c" "d" "e")
109 ;; a better approach is:
110 (asdf:oos
'asdf
:load-op
'rsm-string
)
111 (rsm.string
:file-
>string-table
112 "/media/disk/Desktop/sandbox/CLS.git/Data/example-mixed.csv")
114 (rsm.string
:file-
>number-table
115 "/media/disk/Desktop/sandbox/CLS.git/Data/example-numeric.csv")
117 (defparameter *my-df-2
*
118 (make-instance 'dataframe-array
121 (transpose-listoflist
122 (rsm.string
:file-
>string-table
123 "/media/disk/Desktop/sandbox/CLS.git/Data/example-mixed.csv")))
124 :doc
"This is an interesting dataframe-array"))
127 (defparameter *my-df-3
*
128 (make-instance 'dataframe-array
131 (transpose-listoflist
132 (rsm.string
:file-
>number-table
133 "/media/disk/Desktop/sandbox/CLS.git/Data/example-numeric.csv")))
134 :doc
"This is an interesting dataframe-array"))
144 (asdf:oos
'asdf
:load-op
'cl-plplot
)
147 (defparameter *gdev
* "xcairo")
148 ;; (defparameter *gdev* "xwin")
149 (cl-plplot::plsdev
*gdev
*)
159 (in-package :ls-user
)
163 ;; REVIEW: general Lisp use guidance
165 (fdefinition 'make-matrix
)
166 (documentation 'make-matrix
'function
)
168 #| Examples from CLHS
, a bit of guidance.
170 ;; This function assumes its callers have checked the types of the
171 ;; arguments, and authorizes the compiler to build in that assumption.
172 (defun discriminant (a b c
)
173 (declare (number a b c
))
174 "Compute the discriminant for a quadratic equation."
175 (- (* b b
) (* 4 a c
))) => DISCRIMINANT
176 (discriminant 1 2/3 -
2) => 76/9
178 ;; This function assumes its callers have not checked the types of the
179 ;; arguments, and performs explicit type checks before making any assumptions.
180 (defun careful-discriminant (a b c
)
181 "Compute the discriminant for a quadratic equation."
182 (check-type a number
)
183 (check-type b number
)
184 (check-type c number
)
185 (locally (declare (number a b c
))
186 (- (* b b
) (* 4 a c
)))) => CAREFUL-DISCRIMINANT
187 (careful-discriminant 1 2/3 -
2) => 76/9
193 (progn ;; experiments with GSL and the Lisp interface.
194 (asdf:oos
'asdf
:load-op
'gsll
)
195 (asdf:oos
'asdf
:load-op
'gsll-tests
)
197 ;; the following should be equivalent
198 (setf *t1
* (LIST 6.18d0
6.647777777777779d0
6.18d0
))
199 (setf *t2
* (MULTIPLE-VALUE-LIST
201 (gsll:make-marray
'DOUBLE-FLOAT
202 :INITIAL-CONTENTS
'(-3.21d0
1.0d0
12.8d0
)))
204 (gsll:MAKE-MARRAY
'DOUBLE-FLOAT
205 :INITIAL-CONTENTS
'(3.0d0
1.0d0
2.0d0
))))
206 (LET ((MEAN (gsll:MEAN VEC
)))
207 (LIST (gsll:ABSOLUTE-DEVIATION VEC
)
208 (gsll:WEIGHTED-ABSOLUTE-DEVIATION VEC WEIGHTS
)
209 (gsll:ABSOLUTE-DEVIATION VEC MEAN
))))))
212 ;; from (gsll:examples 'gsll::numerical-integration) ...
213 (gsll:integration-qng gsll
::one-sine
0.0d0 PI
)
215 (gsll:defun-single axpb
(x) (+ (* 2 x
) 3)) ;; a<-2, b<-3
216 (gsll:integration-qng axpb
1d0
2d0
)
220 (defun-single axpb2
(x) (+ (* a x
) b
)))
221 (gsll:integration-qng axpb2
1d0
2d0
)
224 ;; (gsll:integration-qng
227 ;; (defun-single axpb2 (x) (+ (* a x) b)))
230 ;; right, but weird expansion...
231 (gsll:integration-qng
234 (defun axpb2 (x) (+ (* a x
) b
))
235 (gsll:def-single-function axpb2
)
239 ;; Linear least squares
241 (gsll:gsl-lookup
"gsl_linalg_LU_decomp") ; => gsll:lu-decomposition
242 (gsll:gsl-lookup
"gsl_linalg_LU_solve") ; => gsll:lu-solve
248 (progn ;; philosophy time
250 (setf my-model
(model :name
"ex1"
251 :data-slots
(list w x y z
)
252 :param-slots
(list alpha beta gamma
)
253 :math-form
(regression-model :formula
'(= w
(+ (* beta x
)
257 :centrality
'median
; 'mean
264 (setf my-dataset
(statistical-table :table data-frame-contents
265 :metadata
(list (:case-names
(list ))
267 (:documentation
"string of doc"))))
269 (setf my-analysis
(analysis
272 :parameter-map
(pairing (model-param-slots my-model
)
273 (data-var-names my-dataset
))))
275 ;; ontological implications -- the analysis is an abstract class of
276 ;; data, model, and mapping between the model and data. The fit is
277 ;; the instantiation of such. This provides a statistical object
278 ;; computation theory which can be realized as "executable
279 ;; statistics" or "computable statistics".
280 (setf my-analysis
(analyze my-fit
281 :estimation-method
'linear-least-squares-regression
))
283 ;; one of the tricks here is that one needs to provide the structure
284 ;; from which to consider estimation, and more importantly, the
285 ;; validity of the estimation.
288 (setf linear-least-squares-regression
289 (estimation-method-definition
290 :variable-defintions
((list
291 ;; from MachLearn: supervised,
293 :data-response-vars list-drv
; nil if unsup
296 :data-predictor-vars list-dpv
297 ;; nil in this case. these
298 ;; describe "out-of-box" specs
299 :hyper-vars list-hv
))
300 :form
'(regression-additive-error
301 :central-form
(linear-form drv pv dpv
)
302 :error-form
'normal-error
)
303 :resulting-decision
'(point-estimation interval-estimation
)
304 :philosophy
'frequentist
305 :documentation
"use least squares to fit a linear regression
308 (defparameter *statistical-philosophies
*
309 '(frequentist bayesian fiducial decision-analysis
)
310 "can be combined to build decision-making approaches and
313 (defparameter *decisions
*
314 '(estimation selection testing
)
315 "possible results from a...")
316 ;; is this really true? One can embedded hypothesis testing within
317 ;; estimation, as the hypothesis estimated to select. And
318 ;; categorical/continuous rear their ugly heads, but not really in
321 (defparameter *ontology-of-decision-procedures
*
325 (list :maximum-likelihood
330 (list :maximum-likelihood
336 :bioequivalence-inversion
)
341 :partially-parametric
))
342 "start of ontology"))
353 :initial-contents
'((1d0 2d0
3d0
4d0
5d0
6d0
7d0
8d0
))))
359 :initial-contents
'((1d0 1d0
)
369 ;; so something like (NOTE: matrices are transposed to begin with, hence the incongruety)
370 (defparameter *xtx-2
* (m* (transpose *xv
+1*) *xv
+1*))
371 ;; #<LA-SIMPLE-MATRIX-DOUBLE 2 x 2
375 (defparameter *xty-2
* (m* (transpose *xv
+1*) (transpose *y
*)))
376 ;; #<LA-SIMPLE-VECTOR-DOUBLE (2 x 1)
380 (defparameter *rcond-2
* 0.000001)
381 (defparameter *betahat-2
* (gelsy *xtx-2
* *xty-2
* *rcond-2
*))
382 ;; *xtx-2* => "details of complete orthogonal factorization"
383 ;; according to man page:
384 ;; #<LA-SIMPLE-MATRIX-DOUBLE 2 x 2
385 ;; -119.33147112141039d0 -29.095426104883202d0
386 ;; 0.7873402682880205d0 -1.20672274167718d0>
388 ;; *xty-2* => output becomes solution:
389 ;; #<LA-SIMPLE-VECTOR-DOUBLE (2 x 1)
390 ;; -0.16666666666668312d0
391 ;; 1.333333333333337d0>
393 *betahat-2
* ; which matches R, see below
395 (documentation 'gelsy
'function
)
398 ;; (#<LA-SIMPLE-VECTOR-DOUBLE (2 x 1)
399 ;; -0.16666666666668312 1.333333333333337>
402 ;; ## Test case in R:
403 ;; x <- c( 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0)
404 ;; y <- c( 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0)
406 ;; ## => Call: lm(formula = y ~ x)
408 ;; Coefficients: (Intercept) x
415 ;; lm(formula = y ~ x)
418 ;; Min 1Q Median 3Q Max
419 ;; -1.833e+00 -6.667e-01 -3.886e-16 6.667e-01 1.833e+00
422 ;; Estimate Std. Error t value Pr(>|t|)
423 ;; (Intercept) -0.1667 1.1587 -0.144 0.89034
424 ;; x 1.3333 0.3043 4.382 0.00466 **
426 ;; Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
428 ;; Residual standard error: 1.291 on 6 degrees of freedom
429 ;; Multiple R-squared: 0.7619, Adjusted R-squared: 0.7222
430 ;; F-statistic: 19.2 on 1 and 6 DF, p-value: 0.004659
434 ;; which suggests one might do (modulo ensuring correct
435 ;; orientations). When this is finalized, it should migrate to
440 (defparameter *n
* 20) ; # rows = # obsns
441 (defparameter *p
* 10) ; # cols = # vars
442 (defparameter *x-temp
* (rand *n
* *p
*))
443 (defparameter *b-temp
* (rand *p
* 1))
444 (defparameter *y-temp
* (m* *x-temp
* *b-temp
*))
446 (defparameter *rcond
* (* (coerce (expt 2 -
52) 'double-float
)
447 (max (nrows *x-temp
*) (ncols *y-temp
*))))
448 (defparameter *orig-x
* (copy *x-temp
*))
449 (defparameter *orig-b
* (copy *b-temp
*))
450 (defparameter *orig-y
* (copy *y-temp
*))
452 (defparameter *lm-result
* (lm *x-temp
* *y-temp
*))
453 (princ (first *lm-result
*))
454 (princ (second *lm-result
*))
455 (princ (third *lm-result
*))
456 (v= (third *lm-result
*)
457 (v- (first (first *lm-result
*))
458 (first (second *lm-result
*))))
463 ;; Some issues exist in the LAPACK vs. LINPACK variants, hence R
464 ;; uses LINPACK primarily, rather than LAPACK. See comments in R
465 ;; source for issues.
468 ;; Goal is to start from X, Y and then realize that if
469 ;; Y = X \beta, then, i.e. 8x1 = 8xp px1 + 8x1
470 ;; XtX \hat\beta = Xt Y
471 ;; so that we can solve the equation W \beta = Z where W and Z
472 ;; are known, to estimate \beta.
474 ;; the above is known to be numerically instable -- some processing
475 ;; of X is preferred and should be done prior. And most of the
476 ;; transformation-based work does precisely that.
478 ;; recall: Var[Y] = E[(Y - E[Y])(Y-E[Y])t]
479 ;; = E[Y Yt] - 2 \mu \mut + \mu \mut
480 ;; = E[Y Yt] - \mu \mut
482 ;; Var Y = E[Y^2] - \mu^2
485 ;; For initial estimates of covariance of \hat\beta:
487 ;; \hat\beta = (Xt X)^-1 Xt Y
488 ;; with E[ \hat\beta ]
489 ;; = E[ (Xt X)^-1 Xt Y ]
490 ;; = E[(Xt X)^-1 Xt (X\beta)]
493 ;; So Var[\hat\beta] = ...
495 ;; and this gives SE(\beta_i) = (* (sqrt (mref Var i i)) adjustment)
501 (let ((*default-implementation
* :foreign-array
))
507 (rcond (* (coerce (expt 2 -
52) 'double-float
)
508 (max (nrows a
) (ncols a
))))
512 (list x
(gelsy a b rcond
))
513 ;; no applicable conversion?
514 ;; (m- (#<FA-SIMPLE-VECTOR-DOUBLE (10 x 1))
515 ;; (#<FA-SIMPLE-VECTOR-DOUBLE (10 x 1)) )
516 (v- x
(first (gelsy a b rcond
))))))
519 (princ *temp-result
*)
522 (let ((*default-implementation
* :lisp-array
))
528 (rcond (* (coerce (expt 2 -
52) 'double-float
)
529 (max (nrows a
) (ncols a
))))
533 (list x
(gelsy a b rcond
))
534 (m- x
(first (gelsy a b rcond
)))
536 (princ *temp-result
*)
542 :type
:row
;; default, not usually needed!
543 :initial-contents
'((1d0 3d0
2d0
4d0
3d0
5d0
4d0
6d0
))))
549 :initial-contents
'((1d0 2d0
3d0
4d0
5d0
6d0
7d0
8d0
))))
551 ;; so something like (NOTE: matrices are transposed to begin with, hence the incongruety)
552 (defparameter *xtx-1
* (m* *xv
* (transpose *xv
*)))
553 (defparameter *xty-1
* (m* *xv
* (transpose *y
*)))
554 (defparameter *rcond-in
* (* (coerce (expt 2 -
52) 'double-float
)
558 (defparameter *betahat
* (gelsy *xtx-1
* *xty-1
* *rcond-in
*))
560 ;; (#<LA-SIMPLE-VECTOR-DOUBLE (1 x 1)
561 ;; 1.293103448275862>
564 ;; ## Test case in R:
565 ;; x <- c( 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0)
566 ;; y <- c( 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0)
570 ;; lm(formula = y ~ x - 1)
581 (type-of #2A
((1 2 3 4 5)
584 (type-of (rand 10 20))
586 (typep #2A
((1 2 3 4 5)
590 (typep (rand 10 20) 'matrix-like
)
592 (typep #2A
((1 2 3 4 5)
596 (typep (rand 10 20) 'array
)