Comment-doc -- at least for now, LISTS-OF-LISTS are ROW-MAJOR, NOT COLUMN-MAJOR AS...
[CommonLispStat.git] / TODO.lisp
blobf5ef222d24b9477bb9dd5e9356e5abcb02188ea7
1 ;;; -*- mode: lisp -*-
3 ;;; Time-stamp: <2009-01-11 17:10:57 tony>
4 ;;; Creation: <2008-09-08 08:06:30 tony>
5 ;;; File: TODO.lisp
6 ;;; Author: AJ Rossini <blindglobe@gmail.com>
7 ;;; Copyright: (c) 2007-2008, AJ Rossini <blindglobe@gmail.com>. BSD.
8 ;;; Purpose: Stuff that needs to be made working sits inside the progns...
10 ;;; What is this talk of 'release'? Klingons do not make software
11 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
12 ;;; designers and quality assurance people in its wake.
14 ;;; This file contains the current challenges to solve, including a
15 ;;; description of the setup and the work to solve....
17 ;;; SET UP
19 (in-package :cl-user)
20 ;;(asdf:oos 'asdf:compile-op 'lispstat)
21 ;;(asdf:oos 'asdf:load-op 'lispstat)
24 (in-package :lisp-stat-unittests)
26 ;; tests = 54, failures = 7, errors = 3
28 (describe (run-tests :suite 'lisp-stat-ut))
29 (run-tests :suite 'lisp-stat-ut)
31 (in-package :ls-user)
33 ;;; FIXME: Example: currently not relevant, yet
35 (describe
36 (lift::run-test
37 :test-case 'lisp-stat-unittests::create-proto
38 :suite 'lisp-stat-unittests::lisp-stat-ut-proto))
41 :;; FIXME: data frames and structural inheritance
43 ;; Serious flaw -- need to consider that we are not really well
44 ;; working with the data structures, in that Luke created compound as
45 ;; a base class, which turns out to be slightly backward if we are to
46 ;; maintain the numerical structures as well as computational
47 ;; efficiency.
49 ;; Currently, we assume that the list-of-list representation is in
50 ;; row-major form, i.e. that lists represent rows and not columns.
51 ;; The original lisp-stat had the other way around. We could augment
52 ;; the top-level list with a property to check orientation
53 ;; (row-major/column-major), but this hasn't been done yet.
55 #+nil
56 (progn ;; FIXME: Regression modeling
58 (defparameter m nil
59 "holding variable.")
60 ;; need to make vectors and matrices from the lists...
62 (def m (regression-model (list->vector-like iron)
63 (list->vector-like absorbtion) :print nil))
64 ;;Good
65 (send m :print)
66 (send m :own-slots)
67 (send m :own-methods)
68 ;; (lsos::ls-objects-methods m) ; bogus?
69 (send m :show)
71 (def m (regression-model (list->vector-like iron)
72 (list->vector-like absorbtion)))
74 (def m (regression-model (listoflists->matrix-like (list iron aluminum))
75 (list->vector-like absorbtion) :print nil))
78 (documentation 'make-matrix 'function)
80 ;; Making data-frames (i.e. cases (rows) by variables (columns))
81 ;; takes a bit of getting used to. For this, it is important to
82 ;; realize that we can do the following:
83 ;; #1 - consider the possibility of having a row, and transposing
84 ;; it, so the list-of-lists is: ((1 2 3 4 5)) (1 row, 5 columns)
85 ;; #2 - naturally list-of-lists: ((1)(2)(3)(4)(5)) (5 rows, 1 column)
86 (defparameter *indep-vars-1-matrix*
87 (transpose (make-matrix 1 (length iron)
88 :initial-contents
89 (list (mapcar #'(lambda (x) (coerce x 'double-float))
90 iron))))
91 "test param")
93 (documentation '*indep-vars-1-matrix* 'variable)
94 ;; *indep-vars-1-matrix*
96 ;; or directly:
97 (defparameter *indep-vars-1a-matrix*
98 (make-matrix (length iron) 1
99 :initial-contents
100 (mapcar #'(lambda (x) (list (coerce x 'double-float)))
101 iron)))
102 ;; *indep-vars-1a-matrix*
104 ;; and mathematically, they seem equal:
105 (m= *indep-vars-1-matrix* *indep-vars-1a-matrix*) ; => T
106 (eql *indep-vars-1-matrix* *indep-vars-1a-matrix*) ; => NIL
107 (eq *indep-vars-1-matrix* *indep-vars-1a-matrix*) ; => NIL
109 (print *indep-vars-1-matrix*)
110 (print *indep-vars-1a-matrix*)
112 ;; the weird way
113 (defparameter *indep-vars-2-matrix*
114 (transpose (make-matrix 2 (length iron)
115 :initial-contents
116 (list
117 (mapcar #'(lambda (x) (coerce x 'double-float))
118 iron)
119 (mapcar #'(lambda (x) (coerce x 'double-float))
120 aluminum)))))
121 ;; *indep-vars-2-matrix*
123 ;; the "right"? way
124 (defparameter *indep-vars-2-matrix*
125 (make-matrix (length iron) 2
126 :initial-contents
127 (mapcar #'(lambda (x y)
128 (list (coerce x 'double-float)
129 (coerce y 'double-float)))
130 iron aluminum)))
131 ;; *indep-vars-2-matrix*
133 (defun lists-of-same-size (&rest list-of-list-names)
134 "Check to see if the lengths of the lists are equal, to justify
135 further processing and initial conditions."
136 (if (< 0 (reduce #'(lambda (x y) (if (= x y) y -1))
137 (mapcar #'length list-of-list-names)))
138 T nil))
141 ;; (and T T nil T)
142 ;; (and T T T)
143 ;; (defparameter *x1* (list 1 2 3))
144 ;; (defparameter *x2* (list 1 2 3))
145 ;; (defparameter *x3* (list 1 2 3 4))
146 ;; (defparameter *x4* (list 1 2 3))
148 (reduce #'(lambda (x y)
149 (if (= x y) y -1))
150 (mapcar #'length (list *x1* *x2* *x3*)))
151 (reduce #'(lambda (x y)
152 (if (= x y) y -1)) (list 2 3 2))
154 ;; (lists-of-same-size *x1* *x2* *x4*) ; => T
155 ;; (lists-of-same-size *x1* *x3* *x4*) ; => F
156 ;; (lists-of-same-size *x1* *x2* *x3*) ; => F
157 ;; (lists-of-same-size *x3* *x1* *x3*) ; => F
161 (defmacro make-data-set-from-lists (datasetname
162 &optional (force-overwrite nil)
163 &rest lists-of-data-lists)
164 "Create a cases-by-variables data frame consisting of numeric data."
165 (if (or (not (boundp datasetname))
166 force-overwrite)
167 (if (lists-of-same-size lists-of-data-lists)
168 `(defparameter ,datasetname
169 (make-matrix (length iron) 2
170 :initial-contents
171 (mapcar #'(lambda (x y)
172 (list (coerce x 'double-float)
173 (coerce y 'double-float)))
174 @lists-of-data-lists)))
175 (error "make-data-set-from-lists: no combining different length lists"))
176 (error "make-data-set-from-lists: proposed name exists")))
178 (macroexpand (make-data-set-from-lists
179 this-data
180 :force-overwrite nil
181 aluminum iron))
186 ;; The below FAILS due to coercion issues; it just isn't lispy, it's R'y.
188 (defparameter *dep-var* (make-vector (length absorbtion)
189 :initial-contents (list absorbtion)))
191 ;; BUT below, this should be the right type.
192 (defparameter *dep-var*
193 (make-vector (length absorbtion)
194 :type :row
195 :initial-contents
196 (list
197 (mapcar #'(lambda (x) (coerce x 'double-float))
198 absorbtion))))
199 ;; *dep-var*
202 (defparameter *dep-var-int*
203 (make-vector (length absorbtion)
204 :type :row
205 :element-type 'integer
206 :initial-contents (list absorbtion)))
208 (typep *dep-var* 'matrix-like) ; => T
209 (typep *dep-var* 'vector-like) ; => T
211 (typep *indep-vars-1-matrix* 'matrix-like) ; => T
212 (typep *indep-vars-1-matrix* 'vector-like) ; => T
213 (typep *indep-vars-2-matrix* 'matrix-like) ; => T
214 (typep *indep-vars-2-matrix* 'vector-like) ; => F
216 (def m1 (regression-model-new *indep-vars-1-matrix* *dep-var* ))
217 (def m2 (regression-model-new *indep-vars-2-matrix* *dep-var* ))
219 iron
220 ;; following fails, need to ensure that we work on list elts, not just
221 ;; elts within a list:
222 ;; (coerce iron 'real)
224 ;; the following is a general list-conversion coercion approach -- is
225 ;; there a more efficient way?
226 (mapcar #'(lambda (x) (coerce x 'double-float)) iron)
228 (coerce 1 'real)
230 (send m :compute)
231 (send m :sweep-matrix)
232 (format t "~%~A~%" (send m :sweep-matrix))
234 ;; need to get multiple-linear regression working (simple linear regr
235 ;; works)... to do this, we need to redo the whole numeric structure,
236 ;; I'm keeping these in as example of brokenness...
238 (send m :basis) ;; this should be positive?
239 (send m :coef-estimates) )
241 #+nil
242 (progn ;; FIXME: Need to clean up data examples, licenses, attributions, etc.
243 ;; The following breaks because we should use a package to hold
244 ;; configuration details, and this would be the only package outside
245 ;; of packages.lisp, as it holds the overall defsystem structure.
246 (load-data "iris.lsp") ;; (the above partially fixed).
247 (variables)
248 diabetes )
250 #+nil
251 (progn ;; FIXME: Data.Frames probably deserve to be related to lists --
252 ;; either lists of cases, or lists of variables. We probably do not
253 ;; want to mix them, but want to be able to convert between such
254 ;; structures.
256 (defparameter *my-case-data*
257 '((:cases
258 (:case1 Y Med 3.4 5)
259 (:case2 N Low 3.2 3)
260 (:case3 Y High 3.1 4))
261 (:var-names (list "Response" "Level" "Pressure" "Size"))))
263 *my-case-data*
265 (elt *my-case-data* 1)
266 (elt *my-case-data* 0)
267 (elt *my-case-data* 2) ;; error
268 (elt (elt *my-case-data* 0) 1)
269 (elt (elt *my-case-data* 0) 0)
270 (elt (elt (elt *my-case-data* 0) 1) 0)
271 (elt (elt (elt *my-case-data* 0) 1) 1)
272 (elt (elt (elt *my-case-data* 0) 1) 2)
273 (elt (elt *my-case-data* 0) 3))
275 #+nil
276 (progn ;; FIXME: read data from CSV file. To do.
278 ;; challenge is to ensure that we get mixed arrays when we want them,
279 ;; and single-type (simple) arrays in other cases.
281 (defparameter *csv-num* (read-csv "Data/example-num.csv" :type 'numeric))
282 (defparameter *csv-mix* (read-csv "Data/example-mixed.csv" :type 'data))
284 ;; The handling of these types should be compariable to what we do for
285 ;; matrices, but without the numerical processing. i.e. mref, bind2,
286 ;; make-dataframe, and the class structure should be similar.
288 ;; With numerical data, there should be a straightforward mapping from
289 ;; the data.frame to a matrix. With categorical data (including
290 ;; dense categories such as doc-strings, as well as sparse categories
291 ;; such as binary data), we need to include metadata about ordering,
292 ;; coding, and such. So the structures should probably consider
294 ;; Using the CSV file:
296 (asdf:oos 'asdf:compile-op 'csv :force t)
297 (asdf:oos 'asdf:load-op 'parse-number)
298 (asdf:oos 'asdf:load-op 'csv)
299 (fare-csv:read-csv-file "Data/example-numeric.csv")
301 ;; but I think the cl-csv package is broken, need to use the dsv-style
302 ;; package.
304 ;; now we've got the DSV code in the codebase, auto-loaded I hope:
305 cybertiggyr-dsv:*field-separator*
306 (defparameter *example-numeric.csv*
307 (cybertiggyr-dsv:load-escaped "Data/example-numeric.csv"
308 :field-separator #\,))
309 *example-numeric.csv*
311 ;; the following fails because we've got a bit of string conversion
312 ;; to do. 2 thoughts: #1 modify dsv package, but mucking with
313 ;; encapsulation. #2 add a coercion tool (better, but potentially
314 ;; inefficient).
315 #+nil(coerce (nth 3 (nth 3 *example-numeric.csv*)) 'double-float)
317 ;; cases, simple to not so
318 (defparameter *test-string1* "1.2")
319 (defparameter *test-string2* " 1.2")
320 (defparameter *test-string3* " 1.2 ")
326 #+nil
327 (progn ;; experiments with GSL and the Lisp interface.
328 (asdf:oos 'asdf:load-op 'gsll)
329 (asdf:oos 'asdf:load-op 'gsll-tests)
331 ;; the following should be equivalent
332 (setf *t1* (LIST 6.18d0 6.647777777777779d0 6.18d0))
333 (setf *t2* (MULTIPLE-VALUE-LIST
334 (LET ((VEC
335 (gsll:make-marray 'DOUBLE-FLOAT
336 :INITIAL-CONTENTS '(-3.21d0 1.0d0 12.8d0)))
337 (WEIGHTS
338 (gsll:MAKE-MARRAY 'DOUBLE-FLOAT
339 :INITIAL-CONTENTS '(3.0d0 1.0d0 2.0d0))))
340 (LET ((MEAN (gsll:MEAN VEC)))
341 (LIST (gsll:ABSOLUTE-DEVIATION VEC)
342 (gsll:WEIGHTED-ABSOLUTE-DEVIATION VEC WEIGHTS)
343 (gsll:ABSOLUTE-DEVIATION VEC MEAN))))))
344 (eql *t1* *t2*)
346 ;; from (gsll:examples 'gsll::numerical-integration) ...
347 (gsll:integration-qng gsll::one-sine 0.0d0 PI)
350 (defun-single axpb (x) (+ (* 2 x) 3)) ;; a<-2, b<-3
351 (gsll:integration-qng axpb 1d0 2d0)
353 (let ((a 2)
354 (b 3))
355 (defun-single axpb2 (x) (+ (* a x) b)))
356 (gsll:integration-qng axpb2 1d0 2d0)
359 #| BAD
360 (gsll:integration-qng
361 (let ((a 2)
362 (b 3))
363 (defun-single axpb2 (x) (+ (* a x) b)))
364 1d0 2d0)
367 ;; right, but weird expansion...
368 (gsll:integration-qng
369 (let ((a 2)
370 (b 3))
371 (defun axpb2 (x) (+ (* a x) b))
372 (def-single-function axpb2)
373 axpb2)
374 1d0 2d0)
381 #+nil
382 (progn ;; philosophy time
384 (setf my-model (model :name "ex1"
385 :data-slots (list x y z)
386 :param-slots (list alpha beta gamma)
387 :math-form (regression-model :formula '(= y (+ (* beta x)
388 (* alpha y)
389 (* gamma z)
390 normal-error)))))
391 (setf my-dataset (statistical-table :table data-frame-contents
392 :metadata (list (:case-names (list ))
393 (:var-names (list ))
394 (:documentation "string of doc"))))
396 (setf my-analysis (analysis
397 :model my-model
398 :data my-dataset
399 :parameter-map (pairing (model-param-slots my-model)
400 (data-var-names my-dataset))))
402 ;; ontological implications -- the analysis is an abstract class of
403 ;; data, model, and mapping between the model and data. The fit is
404 ;; the instantiation of such. This provides a statistical object
405 ;; computation theory which can be realized as "executable
406 ;; statistics" or "computable statistics".
407 (setf my-analysis (analyze my-fit
408 :estimation-method 'linear-least-squares-regression))
410 ;; one of the tricks here is that one needs to provide the structure
411 ;; from which to consider estimation, and more importantly, the
412 ;; validity of the estimation.
415 (setf linear-least-squares-regression
416 (estimation-method-definition
417 :variable-defintions ((list
418 ;; from MachLearn: supervised,
419 ;; unsupervised
420 :data-response-vars list-drv ; nil if unsup
422 :param-vars list-pv
423 :data-predictor-vars list-dpv
424 ;; nil in this case. these
425 ;; describe "out-of-box" specs
426 :hyper-vars list-hv))
427 :form '(regression-additive-error
428 :central-form (linear-form drv pv dpv)
429 :error-form 'normal-error)
430 :resulting-decision '(point-estimation interval-estimation)
431 :philosophy 'frequentist
432 :documentation "use least squares to fit a linear regression
433 model to data."))
435 (defparameter *statistical-philosophies*
436 '(frequentist bayesian fiducial decision-analysis)
437 "can be combined to build decision-making approaches and
438 characterizations")
440 (defparameter *decisions*
441 '(estimation selection testing)
442 "possible results from a...")
443 ;; is this really true? One can embedded hypothesis testing within
444 ;; estimation, as the hypothesis estimated to select. And
445 ;; categorical/continuous rear their ugly heads, but not really in
446 ;; an essential way.
448 (defparameter *ontology-of-decision-procedures*
449 (list :decisions
450 (list :estimation
451 (list :point
452 (list :maximum-likelihood
453 :minimum-entropy
454 :least-squares
455 :method-of-moments)
456 :interval
457 (list :maximum-likelihood
459 :testing
460 (list :fisherian
461 :neyman-pearson
462 (list :traditional
463 :bioequivalence-inversion)
464 :selection
465 (list :ranking
466 :top-k-of-n-select))
467 :parametric
468 :partially-parametric))
469 "start of ontology")
479 #+nil
480 (progn ;;; QR factorization
481 ;; Need to incorporate the xGEQRF routines, to support linear
482 ;; regression work.
484 ;; Some issues exist in the LAPACK vs. LINPACK variants, hence R
485 ;; uses LINPACK primarily, rather than LAPACK. See comments in R
486 ;; source for issues.
488 ;; LAPACK suggests to use the xGELSY driver (GE general matrix, LS
489 ;; least squares, need to lookup Y intent (used to be an X alg, see
490 ;; release notes).
492 ;; Goal is to start from X, Y and then realize that if
493 ;; Y = X \beta, then, i.e. 8x1 = 8xp px1 + 8x1
494 ;; XtX \hat\beta = Xt Y
495 ;; so that we can solve the equation W \beta = Z where W and Z
496 ;; are known, to estimate \beta.
497 (defparameter *xv*
498 (make-vector
500 :initial-contents '((1d0 3d0 2d0 4d0 3d0 5d0 4d0 6d0))))
502 (defparameter *xv+1*
503 (make-matrix
505 :initial-contents '((1d0 1d0)
506 (1d0 3d0)
507 (1d0 2d0)
508 (1d0 4d0)
509 (1d0 3d0)
510 (1d0 5d0)
511 (1d0 4d0)
512 (1d0 6d0))))
514 (defparameter *xm*
515 (make-matrix
517 :initial-contents '((1d0 3d0 2d0 4d0 3d0 5d0 4d0 6d0)
518 (1d0 2d0 3d0 4d0 5d0 6d0 7d0 8d0))))
520 (defparameter *y*
521 (make-vector
523 :initial-contents '((1d0 2d0 3d0 4d0 5d0 6d0 7d0 8d0))))
525 ;; so something like (NOTE: matrices are transposed to begin with, hence the incongruety)
526 (defparameter *xtx* (m* *xv* (transpose *xv*)))
527 (defparameter *xty* (m* *xv* (transpose *y*)))
528 (defparameter *rcond* 1)
529 (defparameter *betahat* (gelsy *xtx* *xty* *rcond*))
530 *betahat*
533 (#<LA-SIMPLE-VECTOR-DOUBLE (1 x 1)
534 1.293103448275862>
537 ## Test case in R:
538 x <- c( 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0)
539 y <- c( 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0)
540 lm (y ~ x -1)
541 ## =>
542 Call:
543 lm(formula = y ~ x - 1)
545 Coefficients:
547 1.293
551 ;; so something like (NOTE: matrices are transposed to begin with, hence the incongruety)
552 (defparameter *xtx* (m* *xv+1* (transpose *xv+1*)))
553 (defparameter *xty* (m* *xv+1* (transpose *y*)))
554 (defparameter *rcond* 1)
555 (defparameter *betahat* (gelsy *xtx* *xty* *rcond*))
556 *betahat*
560 ;; which suggests one might do (modulo ensuring correct orientations)
561 (defun lm (x y)
562 (let ((betahat (gelsy (m* x (transpose x))
563 (m* x (transpose y)))))
565 (values betahat (sebetahat betahat x y))))
566 ;; to get a results list containing betahat and SEs
568 (values-list '(1 3 4))