3 ;;; Time-stamp: <2009-01-11 17:10:57 tony>
4 ;;; Creation: <2008-09-08 08:06:30 tony>
6 ;;; Author: AJ Rossini <blindglobe@gmail.com>
7 ;;; Copyright: (c) 2007-2008, AJ Rossini <blindglobe@gmail.com>. BSD.
8 ;;; Purpose: Stuff that needs to be made working sits inside the progns...
10 ;;; What is this talk of 'release'? Klingons do not make software
11 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
12 ;;; designers and quality assurance people in its wake.
14 ;;; This file contains the current challenges to solve, including a
15 ;;; description of the setup and the work to solve....
20 ;;(asdf:oos 'asdf:compile-op 'lispstat)
21 ;;(asdf:oos 'asdf:load-op 'lispstat)
24 (in-package :lisp-stat-unittests
)
26 ;; tests = 54, failures = 7, errors = 3
28 (describe (run-tests :suite
'lisp-stat-ut
))
29 (run-tests :suite
'lisp-stat-ut
)
33 ;;; FIXME: Example: currently not relevant, yet
37 :test-case
'lisp-stat-unittests
::create-proto
38 :suite
'lisp-stat-unittests
::lisp-stat-ut-proto
))
41 :;; FIXME: data frames and structural inheritance
43 ;; Serious flaw -- need to consider that we are not really well
44 ;; working with the data structures, in that Luke created compound as
45 ;; a base class, which turns out to be slightly backward if we are to
46 ;; maintain the numerical structures as well as computational
51 (progn ;; FIXME: Regression modeling
55 ;; need to make vectors and matrices from the lists...
57 (def m
(regression-model (list->vector-like iron
)
58 (list->vector-like absorbtion
) :print nil
))
63 ;; (lsos::ls-objects-methods m) ; bogus?
66 (def m
(regression-model (list->vector-like iron
)
67 (list->vector-like absorbtion
)))
69 (def m
(regression-model (listoflists->matrix-like
(list iron aluminum
))
70 (list->vector-like absorbtion
) :print nil
))
73 (documentation 'make-matrix
'function
)
75 ;; Making data-frames (i.e. cases (rows) by variables (columns))
76 ;; takes a bit of getting used to. For this, it is important to
77 ;; realize that we can do the following:
78 ;; #1 - consider the possibility of having a row, and transposing
79 ;; it, so the list-of-lists is: ((1 2 3 4 5)) (1 row, 5 columns)
80 ;; #2 - naturally list-of-lists: ((1)(2)(3)(4)(5)) (5 rows, 1 column)
81 (defparameter *indep-vars-1-matrix
*
82 (transpose (make-matrix 1 (length iron
)
84 (list (mapcar #'(lambda (x) (coerce x
'double-float
))
88 (documentation '*indep-vars-1-matrix
* 'variable
)
89 ;; *indep-vars-1-matrix*
92 (defparameter *indep-vars-1a-matrix
*
93 (make-matrix (length iron
) 1
95 (mapcar #'(lambda (x) (list (coerce x
'double-float
)))
97 ;; *indep-vars-1a-matrix*
99 ;; and mathematically, they seem equal:
100 (m= *indep-vars-1-matrix
* *indep-vars-1a-matrix
*) ; => T
101 (eql *indep-vars-1-matrix
* *indep-vars-1a-matrix
*) ; => NIL
102 (eq *indep-vars-1-matrix
* *indep-vars-1a-matrix
*) ; => NIL
104 (print *indep-vars-1-matrix
*)
105 (print *indep-vars-1a-matrix
*)
108 (defparameter *indep-vars-2-matrix
*
109 (transpose (make-matrix 2 (length iron
)
112 (mapcar #'(lambda (x) (coerce x
'double-float
))
114 (mapcar #'(lambda (x) (coerce x
'double-float
))
116 ;; *indep-vars-2-matrix*
119 (defparameter *indep-vars-2-matrix
*
120 (make-matrix (length iron
) 2
122 (mapcar #'(lambda (x y
)
123 (list (coerce x
'double-float
)
124 (coerce y
'double-float
)))
126 ;; *indep-vars-2-matrix*
128 (defun lists-of-same-size (&rest list-of-list-names
)
129 "Check to see if the lengths of the lists are equal, to justify
130 further processing and initial conditions."
131 (if (< 0 (reduce #'(lambda (x y
) (if (= x y
) y -
1))
132 (mapcar #'length list-of-list-names
)))
138 ;; (defparameter *x1* (list 1 2 3))
139 ;; (defparameter *x2* (list 1 2 3))
140 ;; (defparameter *x3* (list 1 2 3 4))
141 ;; (defparameter *x4* (list 1 2 3))
143 (reduce #'(lambda (x y
)
145 (mapcar #'length
(list *x1
* *x2
* *x3
*)))
146 (reduce #'(lambda (x y
)
147 (if (= x y
) y -
1)) (list 2 3 2))
149 ;; (lists-of-same-size *x1* *x2* *x4*) ; => T
150 ;; (lists-of-same-size *x1* *x3* *x4*) ; => F
151 ;; (lists-of-same-size *x1* *x2* *x3*) ; => F
152 ;; (lists-of-same-size *x3* *x1* *x3*) ; => F
156 (defmacro make-data-set-from-lists
(datasetname
157 &optional
(force-overwrite nil
)
158 &rest lists-of-data-lists
)
159 "Create a cases-by-variables data frame consisting of numeric data."
160 (if (or (not (boundp datasetname
))
162 (if (lists-of-same-size lists-of-data-lists
)
163 `(defparameter ,datasetname
164 (make-matrix (length iron
) 2
166 (mapcar #'(lambda (x y
)
167 (list (coerce x
'double-float
)
168 (coerce y
'double-float
)))
169 @lists-of-data-lists
)))
170 (error "make-data-set-from-lists: no combining different length lists"))
171 (error "make-data-set-from-lists: proposed name exists")))
173 (macroexpand (make-data-set-from-lists
181 ;; The below FAILS due to coercion issues; it just isn't lispy, it's R'y.
183 (defparameter *dep-var
* (make-vector (length absorbtion
)
184 :initial-contents
(list absorbtion
)))
186 ;; BUT below, this should be the right type.
187 (defparameter *dep-var
*
188 (make-vector (length absorbtion
)
192 (mapcar #'(lambda (x) (coerce x
'double-float
))
197 (defparameter *dep-var-int
*
198 (make-vector (length absorbtion
)
200 :element-type
'integer
201 :initial-contents
(list absorbtion
)))
203 (typep *dep-var
* 'matrix-like
) ; => T
204 (typep *dep-var
* 'vector-like
) ; => T
206 (typep *indep-vars-1-matrix
* 'matrix-like
) ; => T
207 (typep *indep-vars-1-matrix
* 'vector-like
) ; => T
208 (typep *indep-vars-2-matrix
* 'matrix-like
) ; => T
209 (typep *indep-vars-2-matrix
* 'vector-like
) ; => F
211 (def m1
(regression-model-new *indep-vars-1-matrix
* *dep-var
* ))
212 (def m2
(regression-model-new *indep-vars-2-matrix
* *dep-var
* ))
215 ;; following fails, need to ensure that we work on list elts, not just
216 ;; elts within a list:
217 ;; (coerce iron 'real)
219 ;; the following is a general list-conversion coercion approach -- is
220 ;; there a more efficient way?
221 (mapcar #'(lambda (x) (coerce x
'double-float
)) iron
)
226 (send m
:sweep-matrix
)
227 (format t
"~%~A~%" (send m
:sweep-matrix
))
229 ;; need to get multiple-linear regression working (simple linear regr
230 ;; works)... to do this, we need to redo the whole numeric structure,
231 ;; I'm keeping these in as example of brokenness...
233 (send m
:basis
) ;; this should be positive?
234 (send m
:coef-estimates
) )
237 (progn ;; FIXME: Need to clean up data examples, licenses, attributions, etc.
238 ;; The following breaks because we should use a package to hold
239 ;; configuration details, and this would be the only package outside
240 ;; of packages.lisp, as it holds the overall defsystem structure.
241 (load-data "iris.lsp") ;; (the above partially fixed).
246 (progn ;; FIXME: Data.Frames probably deserve to be related to lists --
247 ;; either lists of cases, or lists of variables. We probably do not
248 ;; want to mix them, but want to be able to convert between such
251 (defparameter *my-case-data
*
255 (:case3 Y High
3.1 4))
256 (:var-names
(list "Response" "Level" "Pressure" "Size"))))
260 (elt *my-case-data
* 1)
261 (elt *my-case-data
* 0)
262 (elt *my-case-data
* 2) ;; error
263 (elt (elt *my-case-data
* 0) 1)
264 (elt (elt *my-case-data
* 0) 0)
265 (elt (elt (elt *my-case-data
* 0) 1) 0)
266 (elt (elt (elt *my-case-data
* 0) 1) 1)
267 (elt (elt (elt *my-case-data
* 0) 1) 2)
268 (elt (elt *my-case-data
* 0) 3))
271 (progn ;; FIXME: read data from CSV file. To do.
273 ;; challenge is to ensure that we get mixed arrays when we want them,
274 ;; and single-type (simple) arrays in other cases.
276 (defparameter *csv-num
* (read-csv "Data/example-num.csv" :type
'numeric
))
277 (defparameter *csv-mix
* (read-csv "Data/example-mixed.csv" :type
'data
))
279 ;; The handling of these types should be compariable to what we do for
280 ;; matrices, but without the numerical processing. i.e. mref, bind2,
281 ;; make-dataframe, and the class structure should be similar.
283 ;; With numerical data, there should be a straightforward mapping from
284 ;; the data.frame to a matrix. With categorical data (including
285 ;; dense categories such as doc-strings, as well as sparse categories
286 ;; such as binary data), we need to include metadata about ordering,
287 ;; coding, and such. So the structures should probably consider
289 ;; Using the CSV file:
291 (asdf:oos
'asdf
:compile-op
'csv
:force t
)
292 (asdf:oos
'asdf
:load-op
'parse-number
)
293 (asdf:oos
'asdf
:load-op
'csv
)
294 (fare-csv:read-csv-file
"Data/example-numeric.csv")
296 ;; but I think the cl-csv package is broken, need to use the dsv-style
299 ;; now we've got the DSV code in the codebase, auto-loaded I hope:
300 cybertiggyr-dsv
:*field-separator
*
301 (defparameter *example-numeric.csv
*
302 (cybertiggyr-dsv:load-escaped
"Data/example-numeric.csv"
303 :field-separator
#\
,))
304 *example-numeric.csv
*
306 ;; the following fails because we've got a bit of string conversion
307 ;; to do. 2 thoughts: #1 modify dsv package, but mucking with
308 ;; encapsulation. #2 add a coercion tool (better, but potentially
310 #+nil
(coerce (nth 3 (nth 3 *example-numeric.csv
*)) 'double-float
)
312 ;; cases, simple to not so
313 (defparameter *test-string1
* "1.2")
314 (defparameter *test-string2
* " 1.2")
315 (defparameter *test-string3
* " 1.2 ")
322 (progn ;; experiments with GSL and the Lisp interface.
323 (asdf:oos
'asdf
:load-op
'gsll
)
324 (asdf:oos
'asdf
:load-op
'gsll-tests
)
326 ;; the following should be equivalent
327 (setf *t1
* (LIST 6.18d0
6.647777777777779d0
6.18d0
))
328 (setf *t2
* (MULTIPLE-VALUE-LIST
330 (gsll:make-marray
'DOUBLE-FLOAT
331 :INITIAL-CONTENTS
'(-3.21d0
1.0d0
12.8d0
)))
333 (gsll:MAKE-MARRAY
'DOUBLE-FLOAT
334 :INITIAL-CONTENTS
'(3.0d0
1.0d0
2.0d0
))))
335 (LET ((MEAN (gsll:MEAN VEC
)))
336 (LIST (gsll:ABSOLUTE-DEVIATION VEC
)
337 (gsll:WEIGHTED-ABSOLUTE-DEVIATION VEC WEIGHTS
)
338 (gsll:ABSOLUTE-DEVIATION VEC MEAN
))))))
341 ;; from (gsll:examples 'gsll::numerical-integration) ...
342 (gsll:integration-qng gsll
::one-sine
0.0d0 PI
)
345 (defun-single axpb
(x) (+ (* 2 x
) 3)) ;; a<-2, b<-3
346 (gsll:integration-qng axpb
1d0
2d0
)
350 (defun-single axpb2
(x) (+ (* a x
) b
)))
351 (gsll:integration-qng axpb2
1d0
2d0
)
355 (gsll:integration-qng
358 (defun-single axpb2
(x) (+ (* a x
) b
)))
362 ;; right, but weird expansion...
363 (gsll:integration-qng
366 (defun axpb2 (x) (+ (* a x
) b
))
367 (def-single-function axpb2
)
377 (progn ;; philosophy time
379 (setf my-model
(model :name
"ex1"
380 :data-slots
(list x y z
)
381 :param-slots
(list alpha beta gamma
)
382 :math-form
(regression-model :formula
'(= y
(+ (* beta x
)
386 (setf my-dataset
(statistical-table :table data-frame-contents
387 :metadata
(list (:case-names
(list ))
389 (:documentation
"string of doc"))))
391 (setf my-analysis
(analysis
394 :parameter-map
(pairing (model-param-slots my-model
)
395 (data-var-names my-dataset
))))
397 ;; ontological implications -- the analysis is an abstract class of
398 ;; data, model, and mapping between the model and data. The fit is
399 ;; the instantiation of such. This provides a statistical object
400 ;; computation theory which can be realized as "executable
401 ;; statistics" or "computable statistics".
402 (setf my-analysis
(analyze my-fit
403 :estimation-method
'linear-least-squares-regression
))
405 ;; one of the tricks here is that one needs to provide the structure
406 ;; from which to consider estimation, and more importantly, the
407 ;; validity of the estimation.
410 (setf linear-least-squares-regression
411 (estimation-method-definition
412 :variable-defintions
((list
413 ;; from MachLearn: supervised,
415 :data-response-vars list-drv
; nil if unsup
418 :data-predictor-vars list-dpv
419 ;; nil in this case. these
420 ;; describe "out-of-box" specs
421 :hyper-vars list-hv
))
422 :form
'(regression-additive-error
423 :central-form
(linear-form drv pv dpv
)
424 :error-form
'normal-error
)
425 :resulting-decision
'(point-estimation interval-estimation
)
426 :philosophy
'frequentist
427 :documentation
"use least squares to fit a linear regression
430 (defparameter *statistical-philosophies
*
431 '(frequentist bayesian fiducial decision-analysis
)
432 "can be combined to build decision-making approaches and
435 (defparameter *decisions
*
436 '(estimation selection testing
)
437 "possible results from a...")
438 ;; is this really true? One can embedded hypothesis testing within
439 ;; estimation, as the hypothesis estimated to select. And
440 ;; categorical/continuous rear their ugly heads, but not really in
443 (defparameter *ontology-of-decision-procedures
*
447 (list :maximum-likelihood
452 (list :maximum-likelihood
458 :bioequivalence-inversion
)
463 :partially-parametric
))
475 (progn ;;; QR factorization
476 ;; Need to incorporate the xGEQRF routines, to support linear
479 ;; Some issues exist in the LAPACK vs. LINPACK variants, hence R
480 ;; uses LINPACK primarily, rather than LAPACK. See comments in R
481 ;; source for issues.
483 ;; LAPACK suggests to use the xGELSY driver (GE general matrix, LS
484 ;; least squares, need to lookup Y intent (used to be an X alg, see
487 ;; Goal is to start from X, Y and then realize that if
488 ;; Y = X \beta, then, i.e. 8x1 = 8xp px1 + 8x1
489 ;; XtX \hat\beta = Xt Y
490 ;; so that we can solve the equation W \beta = Z where W and Z
491 ;; are known, to estimate \beta.
495 :initial-contents
'((1d0 3d0
2d0
4d0
3d0
5d0
4d0
6d0
))))
500 :initial-contents
'((1d0 1d0
)
512 :initial-contents
'((1d0 3d0
2d0
4d0
3d0
5d0
4d0
6d0
)
513 (1d0 2d0
3d0
4d0
5d0
6d0
7d0
8d0
))))
518 :initial-contents
'((1d0 2d0
3d0
4d0
5d0
6d0
7d0
8d0
))))
520 ;; so something like (NOTE: matrices are transposed to begin with, hence the incongruety)
521 (defparameter *xtx
* (m* *xv
* (transpose *xv
*)))
522 (defparameter *xty
* (m* *xv
* (transpose *y
*)))
523 (defparameter *rcond
* 1)
524 (defparameter *betahat
* (gelsy *xtx
* *xty
* *rcond
*))
528 (#<LA-SIMPLE-VECTOR-DOUBLE
(1 x
1)
533 x
<- c
( 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0)
534 y
<- c
( 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0)
538 lm
(formula = y ~ x -
1)
546 ;; so something like (NOTE: matrices are transposed to begin with, hence the incongruety)
547 (defparameter *xtx
* (m* *xv
+1* (transpose *xv
+1*)))
548 (defparameter *xty
* (m* *xv
+1* (transpose *y
*)))
549 (defparameter *rcond
* 1)
550 (defparameter *betahat
* (gelsy *xtx
* *xty
* *rcond
*))
555 ;; which suggests one might do (modulo ensuring correct orientations)
557 (let ((betahat (gelsy (m* x
(transpose x
))
558 (m* x
(transpose y
)))))
560 (values betahat
(sebetahat betahat x y
))))
561 ;; to get a results list containing betahat and SEs
563 (values-list '(1 3 4))