3 ;;; Time-stamp: <2009-02-16 17:33:06 tony>
4 ;;; Creation: <2008-09-08 08:06:30 tony>
6 ;;; Author: AJ Rossini <blindglobe@gmail.com>
7 ;;; Copyright: (c) 2007-2008, AJ Rossini <blindglobe@gmail.com>. BSD.
8 ;;; Purpose: Stuff that needs to be made working sits inside the progns...
10 ;;; What is this talk of 'release'? Klingons do not make software
11 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
12 ;;; designers and quality assurance people in its wake.
14 ;;; This file contains the current challenges to solve, including a
15 ;;; description of the setup and the work to solve....
20 ;;(asdf:oos 'asdf:compile-op 'lispstat)
21 ;;(asdf:oos 'asdf:load-op 'lispstat)
23 (in-package :lisp-stat-unittests
)
25 ;; tests = 54, failures = 7, errors = 3
27 (describe (run-tests :suite
'lisp-stat-ut
))
28 (run-tests :suite
'lisp-stat-ut
)
31 ;; FIXME: Example: currently not relevant, yet
34 :test-case
'lisp-stat-unittests
::create-proto
35 :suite
'lisp-stat-unittests
::lisp-stat-ut-proto
))
43 ;; Making data-frames (i.e. cases (rows) by variables (columns))
44 ;; takes a bit of getting used to. For this, it is important to
45 ;; realize that we can do the following:
46 ;; #1 - consider the possibility of having a row, and transposing
47 ;; it, so the list-of-lists is: ((1 2 3 4 5)) (1 row, 5 columns)
48 ;; #2 - naturally list-of-lists: ((1)(2)(3)(4)(5)) (5 rows, 1 column)
49 ;; see src/data/listoflist.lisp for code to process this particular
51 (defparameter *indep-vars-1-matrix
*
52 (transpose (make-matrix 1 (length iron
)
54 (list (mapcar #'(lambda (x) (coerce x
'double-float
))
56 "creating iron into double float, straightforward")
58 (documentation '*indep-vars-1-matrix
* 'variable
)
59 ;; *indep-vars-1-matrix*
62 (defparameter *indep-vars-1a-matrix
*
63 (make-matrix (length iron
) 1
65 (mapcar #'(lambda (x) (list (coerce x
'double-float
)))
67 ;; *indep-vars-1a-matrix*
69 ;; and mathematically, they seem equal:
70 (m= *indep-vars-1-matrix
* *indep-vars-1a-matrix
*) ; => T
71 ;; but of course not completely...
72 (eql *indep-vars-1-matrix
* *indep-vars-1a-matrix
*) ; => NIL
73 (eq *indep-vars-1-matrix
* *indep-vars-1a-matrix
*) ; => NIL
76 (print *indep-vars-1-matrix
*)
77 (print *indep-vars-1a-matrix
*)
79 (documentation 'lisp-matrix
:bind2
'function
) ; by which we mean:
80 (documentation 'bind2
'function
)
81 (bind2 *indep-vars-1-matrix
* *indep-vars-1a-matrix
* :by
:column
) ; 2 col
82 (bind2 *indep-vars-1-matrix
* *indep-vars-1a-matrix
* :by
:row
) ; 1 long col
85 (defparameter *indep-vars-2-matrix
*
86 (transpose (make-matrix 2 (length iron
)
89 (mapcar #'(lambda (x) (coerce x
'double-float
))
91 (mapcar #'(lambda (x) (coerce x
'double-float
))
93 ;; *indep-vars-2-matrix*
96 (defparameter *indep-vars-2-matrix
*
97 (make-matrix (length iron
) 2
99 (mapcar #'(lambda (x y
)
100 (list (coerce x
'double-float
)
101 (coerce y
'double-float
)))
103 ;; *indep-vars-2-matrix*
106 ;; The below FAILS due to coercion issues; it just isn't lispy, it's R'y.
108 (defparameter *dep-var
* (make-vector (length absorbtion
)
109 :initial-contents
(list absorbtion
)))
111 ;; BUT below, this should be the right type.
112 (defparameter *dep-var
*
113 (make-vector (length absorbtion
)
117 (mapcar #'(lambda (x) (coerce x
'double-float
))
122 (defparameter *dep-var-int
*
123 (make-vector (length absorbtion
)
125 :element-type
'integer
126 :initial-contents
(list absorbtion
)))
128 (typep *dep-var
* 'matrix-like
) ; => T
129 (typep *dep-var
* 'vector-like
) ; => T
131 (typep *indep-vars-1-matrix
* 'matrix-like
) ; => T
132 (typep *indep-vars-1-matrix
* 'vector-like
) ; => T
133 (typep *indep-vars-2-matrix
* 'matrix-like
) ; => T
134 (typep *indep-vars-2-matrix
* 'vector-like
) ; => F
137 ;; following fails, need to ensure that we work on list elts, not just
138 ;; elts within a list:
139 ;; (coerce iron 'real)
141 ;; the following is a general list-conversion coercion approach -- is
142 ;; there a more efficient way?
144 ;; (mapcar #'(lambda (x) (coerce x 'double-float)) iron)
146 (princ "Data Set up"))
152 ;; REVIEW: general Lisp use guidance
154 (fdefinition 'make-matrix
)
155 (documentation 'make-matrix
'function
)
157 #| Examples from CLHS
, a bit of guidance.
159 ;; This function assumes its callers have checked the types of the
160 ;; arguments, and authorizes the compiler to build in that assumption.
161 (defun discriminant (a b c
)
162 (declare (number a b c
))
163 "Compute the discriminant for a quadratic equation."
164 (- (* b b
) (* 4 a c
))) => DISCRIMINANT
165 (discriminant 1 2/3 -
2) => 76/9
167 ;; This function assumes its callers have not checked the types of the
168 ;; arguments, and performs explicit type checks before making any assumptions.
169 (defun careful-discriminant (a b c
)
170 "Compute the discriminant for a quadratic equation."
171 (check-type a number
)
172 (check-type b number
)
173 (check-type c number
)
174 (locally (declare (number a b c
))
175 (- (* b b
) (* 4 a c
)))) => CAREFUL-DISCRIMINANT
176 (careful-discriminant 1 2/3 -
2) => 76/9
182 (progn ;; FIXME: Regression modeling
184 ;; data setup in previous FIXME
185 (defparameter *m
* nil
187 ;; need to make vectors and matrices from the lists...
190 (def *m
* (regression-model (list->vector-like iron
)
191 (list->vector-like absorbtion
)))
193 (def m
(regression-model (list->vector-like iron
)
194 (list->vector-like absorbtion
) :print nil
))
198 (send m
:own-methods
)
199 ;; (lsos::ls-objects-methods m) ; bogus?
202 (def m
(regression-model (list->vector-like iron
)
203 (list->vector-like absorbtion
)))
205 (def m
(regression-model (listoflists->matrix-like
(list iron aluminum
))
206 (list->vector-like absorbtion
) :print nil
))
210 (send m
:sweep-matrix
)
211 (format t
"~%~A~%" (send m
:sweep-matrix
))
213 ;; need to get multiple-linear regression working (simple linear regr
214 ;; works)... to do this, we need to redo the whole numeric structure,
215 ;; I'm keeping these in as example of brokenness...
217 (send m
:basis
) ;; this should be positive?
218 (send m
:coef-estimates
) )
221 (progn ;; FIXME: Need to clean up data examples, licenses, attributions, etc.
222 ;; The following breaks because we should use a package to hold
223 ;; configuration details, and this would be the only package outside
224 ;; of packages.lisp, as it holds the overall defsystem structure.
225 (load-data "iris.lsp") ;; (the above partially fixed).
232 ;; FIXME: Data.Frames probably deserve to be related to lists --
233 ;; either lists of cases, or lists of variables. We probably do not
234 ;; want to mix them, but want to be able to convert between such
237 (defparameter *my-case-data
*
241 (:case3 Y High
3.1 4))
242 (:var-names
(list "Response" "Level" "Pressure" "Size"))))
246 (elt *my-case-data
* 1)
247 (elt *my-case-data
* 0)
248 ;;(elt *my-case-data* 2) ;; error
249 (elt (elt *my-case-data
* 0) 1)
250 (elt (elt *my-case-data
* 0) 0)
251 (elt (elt (elt *my-case-data
* 0) 1) 0)
252 (elt (elt (elt *my-case-data
* 0) 1) 1)
253 (elt (elt *my-case-data
* 0) 2))
257 (progn ;; FIXME: read data from CSV file. To do.
259 ;; challenge is to ensure that we get mixed arrays when we want them,
260 ;; and single-type (simple) arrays in other cases.
262 (defparameter *csv-num
* (read-csv "Data/example-num.csv" :type
'numeric
))
263 (defparameter *csv-mix
* (read-csv "Data/example-mixed.csv" :type
'data
))
265 ;; The handling of these types should be compariable to what we do for
266 ;; matrices, but without the numerical processing. i.e. mref, bind2,
267 ;; make-dataframe, and the class structure should be similar.
269 ;; With numerical data, there should be a straightforward mapping from
270 ;; the data.frame to a matrix. With categorical data (including
271 ;; dense categories such as doc-strings, as well as sparse categories
272 ;; such as binary data), we need to include metadata about ordering,
273 ;; coding, and such. So the structures should probably consider
275 ;; Using the CSV file:
277 (asdf:oos
'asdf
:compile-op
'csv
:force t
)
278 (asdf:oos
'asdf
:load-op
'parse-number
)
279 (asdf:oos
'asdf
:load-op
'csv
)
280 (fare-csv:read-csv-file
"Data/example-numeric.csv")
282 ;; but I think the cl-csv package is broken, need to use the dsv-style
285 ;; now we've got the DSV code in the codebase, auto-loaded I hope:
286 cybertiggyr-dsv
:*field-separator
*
287 (defparameter *example-numeric.csv
*
288 (cybertiggyr-dsv:load-escaped
"Data/example-numeric.csv"
289 :field-separator
#\
,))
290 *example-numeric.csv
*
292 ;; the following fails because we've got a bit of string conversion
293 ;; to do. 2 thoughts: #1 modify dsv package, but mucking with
294 ;; encapsulation. #2 add a coercion tool (better, but potentially
296 #+nil
(coerce (nth 3 (nth 3 *example-numeric.csv
*)) 'double-float
)
298 ;; cases, simple to not so
299 (defparameter *test-string1
* "1.2")
300 (defparameter *test-string2
* " 1.2")
301 (defparameter *test-string3
* " 1.2 ")
306 (progn ;; experiments with GSL and the Lisp interface.
307 (asdf:oos
'asdf
:load-op
'gsll
)
308 (asdf:oos
'asdf
:load-op
'gsll-tests
)
310 ;; the following should be equivalent
311 (setf *t1
* (LIST 6.18d0
6.647777777777779d0
6.18d0
))
312 (setf *t2
* (MULTIPLE-VALUE-LIST
314 (gsll:make-marray
'DOUBLE-FLOAT
315 :INITIAL-CONTENTS
'(-3.21d0
1.0d0
12.8d0
)))
317 (gsll:MAKE-MARRAY
'DOUBLE-FLOAT
318 :INITIAL-CONTENTS
'(3.0d0
1.0d0
2.0d0
))))
319 (LET ((MEAN (gsll:MEAN VEC
)))
320 (LIST (gsll:ABSOLUTE-DEVIATION VEC
)
321 (gsll:WEIGHTED-ABSOLUTE-DEVIATION VEC WEIGHTS
)
322 (gsll:ABSOLUTE-DEVIATION VEC MEAN
))))))
325 ;; from (gsll:examples 'gsll::numerical-integration) ...
326 (gsll:integration-qng gsll
::one-sine
0.0d0 PI
)
328 (gsll:defun-single axpb
(x) (+ (* 2 x
) 3)) ;; a<-2, b<-3
329 (gsll:integration-qng axpb
1d0
2d0
)
333 (defun-single axpb2
(x) (+ (* a x
) b
)))
334 (gsll:integration-qng axpb2
1d0
2d0
)
337 ;; (gsll:integration-qng
340 ;; (defun-single axpb2 (x) (+ (* a x) b)))
343 ;; right, but weird expansion...
344 (gsll:integration-qng
347 (defun axpb2 (x) (+ (* a x
) b
))
348 (gsll:def-single-function axpb2
)
352 ;; Linear least squares
354 (gsll:gsl-lookup
"gsl_linalg_LU_decomp") ; => gsll:lu-decomposition
355 (gsll:gsl-lookup
"gsl_linalg_LU_solve") ; => gsll:lu-solve
361 (progn ;; philosophy time
363 (setf my-model
(model :name
"ex1"
364 :data-slots
(list x y z
)
365 :param-slots
(list alpha beta gamma
)
366 :math-form
(regression-model :formula
'(= y
(+ (* beta x
)
370 (setf my-dataset
(statistical-table :table data-frame-contents
371 :metadata
(list (:case-names
(list ))
373 (:documentation
"string of doc"))))
375 (setf my-analysis
(analysis
378 :parameter-map
(pairing (model-param-slots my-model
)
379 (data-var-names my-dataset
))))
381 ;; ontological implications -- the analysis is an abstract class of
382 ;; data, model, and mapping between the model and data. The fit is
383 ;; the instantiation of such. This provides a statistical object
384 ;; computation theory which can be realized as "executable
385 ;; statistics" or "computable statistics".
386 (setf my-analysis
(analyze my-fit
387 :estimation-method
'linear-least-squares-regression
))
389 ;; one of the tricks here is that one needs to provide the structure
390 ;; from which to consider estimation, and more importantly, the
391 ;; validity of the estimation.
394 (setf linear-least-squares-regression
395 (estimation-method-definition
396 :variable-defintions
((list
397 ;; from MachLearn: supervised,
399 :data-response-vars list-drv
; nil if unsup
402 :data-predictor-vars list-dpv
403 ;; nil in this case. these
404 ;; describe "out-of-box" specs
405 :hyper-vars list-hv
))
406 :form
'(regression-additive-error
407 :central-form
(linear-form drv pv dpv
)
408 :error-form
'normal-error
)
409 :resulting-decision
'(point-estimation interval-estimation
)
410 :philosophy
'frequentist
411 :documentation
"use least squares to fit a linear regression
414 (defparameter *statistical-philosophies
*
415 '(frequentist bayesian fiducial decision-analysis
)
416 "can be combined to build decision-making approaches and
419 (defparameter *decisions
*
420 '(estimation selection testing
)
421 "possible results from a...")
422 ;; is this really true? One can embedded hypothesis testing within
423 ;; estimation, as the hypothesis estimated to select. And
424 ;; categorical/continuous rear their ugly heads, but not really in
427 (defparameter *ontology-of-decision-procedures
*
431 (list :maximum-likelihood
436 (list :maximum-likelihood
442 :bioequivalence-inversion
)
447 :partially-parametric
))
448 "start of ontology"))
459 :initial-contents
'((1d0 2d0
3d0
4d0
5d0
6d0
7d0
8d0
))))
465 :initial-contents
'((1d0 1d0
)
474 (defparameter *xv
+1a
*
477 :initial-contents
#2A
((1d0 1d0
)
486 (defparameter *xv
+1b
*
491 :initial-contents
'((1d0)
501 (m= *xv
+1a
* *xv
+1b
*) ; => T
503 ;; so something like (NOTE: matrices are transposed to begin with, hence the incongruety)
504 (defparameter *xtx-2
* (m* (transpose *xv
+1*) *xv
+1*))
505 ;; #<LA-SIMPLE-MATRIX-DOUBLE 2 x 2
509 (defparameter *xty-2
* (m* (transpose *xv
+1*) (transpose *y
*)))
510 ;; #<LA-SIMPLE-VECTOR-DOUBLE (2 x 1)
514 (defparameter *rcond-2
* 0.000001)
515 (defparameter *betahat-2
* (gelsy *xtx-2
* *xty-2
* *rcond-2
*))
516 ;; *xtx-2* => "details of complete orthogonal factorization"
517 ;; according to man page:
518 ;; #<LA-SIMPLE-MATRIX-DOUBLE 2 x 2
519 ;; -119.33147112141039d0 -29.095426104883202d0
520 ;; 0.7873402682880205d0 -1.20672274167718d0>
522 ;; *xty-2* => output becomes solution:
523 ;; #<LA-SIMPLE-VECTOR-DOUBLE (2 x 1)
524 ;; -0.16666666666668312d0
525 ;; 1.333333333333337d0>
527 *betahat-2
* ; which matches R, see below
529 (documentation 'gelsy
'function
)
532 ;; (#<LA-SIMPLE-VECTOR-DOUBLE (2 x 1)
533 ;; -0.16666666666668312 1.333333333333337>
536 ;; ## Test case in R:
537 ;; x <- c( 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0)
538 ;; y <- c( 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0)
540 ;; ## => Call: lm(formula = y ~ x)
542 ;; Coefficients: (Intercept) x
549 ;; lm(formula = y ~ x)
552 ;; Min 1Q Median 3Q Max
553 ;; -1.833e+00 -6.667e-01 -3.886e-16 6.667e-01 1.833e+00
556 ;; Estimate Std. Error t value Pr(>|t|)
557 ;; (Intercept) -0.1667 1.1587 -0.144 0.89034
558 ;; x 1.3333 0.3043 4.382 0.00466 **
560 ;; Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
562 ;; Residual standard error: 1.291 on 6 degrees of freedom
563 ;; Multiple R-squared: 0.7619, Adjusted R-squared: 0.7222
564 ;; F-statistic: 19.2 on 1 and 6 DF, p-value: 0.004659
568 ;; which suggests one might do (modulo ensuring correct
569 ;; orientations). When this is finalized, it should migrate to
574 (defparameter *n
* 20) ; # rows = # obsns
575 (defparameter *p
* 10) ; # cols = # vars
576 (defparameter *x-temp
* (rand *n
* *p
*))
577 (defparameter *b-temp
* (rand *p
* 1))
578 (defparameter *y-temp
* (m* *x-temp
* *b-temp
*))
580 (defparameter *rcond
* (* (coerce (expt 2 -
52) 'double-float
)
581 (max (nrows *x-temp
*) (ncols *y-temp
*))))
582 (defparameter *orig-x
* (copy *x-temp
*))
583 (defparameter *orig-b
* (copy *b-temp
*))
584 (defparameter *orig-y
* (copy *y-temp
*))
586 (defparameter *lm-result
* (lm *x-temp
* *y-temp
*))
587 (princ (first *lm-result
*))
588 (princ (second *lm-result
*))
589 (princ (third *lm-result
*))
590 (v= (third *lm-result
*)
591 (v- (first (first *lm-result
*))
592 (first (second *lm-result
*)))))