move lin regr example away until we get it solved for lisp-matrix.
[CommonLispStat.git] / TODO.lisp
blob2a7616c80d553a73538231a7eb2de1a0c61de138
1 ;;; -*- mode: lisp -*-
3 ;;; Time-stamp: <2009-01-22 17:02:09 tony>
4 ;;; Creation: <2008-09-08 08:06:30 tony>
5 ;;; File: TODO.lisp
6 ;;; Author: AJ Rossini <blindglobe@gmail.com>
7 ;;; Copyright: (c) 2007-2008, AJ Rossini <blindglobe@gmail.com>. BSD.
8 ;;; Purpose: Stuff that needs to be made working sits inside the progns...
10 ;;; What is this talk of 'release'? Klingons do not make software
11 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
12 ;;; designers and quality assurance people in its wake.
14 ;;; This file contains the current challenges to solve, including a
15 ;;; description of the setup and the work to solve....
17 ;;; SET UP
19 (in-package :cl-user)
20 ;;(asdf:oos 'asdf:compile-op 'lispstat)
21 ;;(asdf:oos 'asdf:load-op 'lispstat)
24 (in-package :lisp-stat-unittests)
26 ;; tests = 54, failures = 7, errors = 3
28 (describe (run-tests :suite 'lisp-stat-ut))
29 (run-tests :suite 'lisp-stat-ut)
32 ;; FIXME: Example: currently not relevant, yet
33 (describe
34 (lift::run-test
35 :test-case 'lisp-stat-unittests::create-proto
36 :suite 'lisp-stat-unittests::lisp-stat-ut-proto))
39 (in-package :ls-user)
42 (progn ;; Data setup
44 ;; Making data-frames (i.e. cases (rows) by variables (columns))
45 ;; takes a bit of getting used to. For this, it is important to
46 ;; realize that we can do the following:
47 ;; #1 - consider the possibility of having a row, and transposing
48 ;; it, so the list-of-lists is: ((1 2 3 4 5)) (1 row, 5 columns)
49 ;; #2 - naturally list-of-lists: ((1)(2)(3)(4)(5)) (5 rows, 1 column)
50 ;; see src/data/listoflist.lisp for code to process this particular
51 ;; data structure.
52 (defparameter *indep-vars-1-matrix*
53 (transpose (make-matrix 1 (length iron)
54 :initial-contents
55 (list (mapcar #'(lambda (x) (coerce x 'double-float))
56 iron))))
57 "creating iron into double float, straightforward")
59 (documentation '*indep-vars-1-matrix* 'variable)
60 ;; *indep-vars-1-matrix*
62 ;; or directly:
63 (defparameter *indep-vars-1a-matrix*
64 (make-matrix (length iron) 1
65 :initial-contents
66 (mapcar #'(lambda (x) (list (coerce x 'double-float)))
67 iron)))
68 ;; *indep-vars-1a-matrix*
70 ;; and mathematically, they seem equal:
71 (m= *indep-vars-1-matrix* *indep-vars-1a-matrix*) ; => T
72 ;; but of course not completely...
73 (eql *indep-vars-1-matrix* *indep-vars-1a-matrix*) ; => NIL
74 (eq *indep-vars-1-matrix* *indep-vars-1a-matrix*) ; => NIL
76 ;; and verify...
77 (print *indep-vars-1-matrix*)
78 (print *indep-vars-1a-matrix*)
80 (documentation 'lisp-matrix:bind2 'function) ; by which we mean:
81 (documentation 'bind2 'function)
82 (bind2 *indep-vars-1-matrix* *indep-vars-1a-matrix* :by :column) ; 2 col
83 (bind2 *indep-vars-1-matrix* *indep-vars-1a-matrix* :by :row) ; 1 long col
85 ;; the weird way
86 (defparameter *indep-vars-2-matrix*
87 (transpose (make-matrix 2 (length iron)
88 :initial-contents
89 (list
90 (mapcar #'(lambda (x) (coerce x 'double-float))
91 iron)
92 (mapcar #'(lambda (x) (coerce x 'double-float))
93 aluminum)))))
94 ;; *indep-vars-2-matrix*
96 ;; the "right"? way
97 (defparameter *indep-vars-2-matrix*
98 (make-matrix (length iron) 2
99 :initial-contents
100 (mapcar #'(lambda (x y)
101 (list (coerce x 'double-float)
102 (coerce y 'double-float)))
103 iron aluminum)))
104 ;; *indep-vars-2-matrix*
107 ;; The below FAILS due to coercion issues; it just isn't lispy, it's R'y.
109 (defparameter *dep-var* (make-vector (length absorbtion)
110 :initial-contents (list absorbtion)))
112 ;; BUT below, this should be the right type.
113 (defparameter *dep-var*
114 (make-vector (length absorbtion)
115 :type :row
116 :initial-contents
117 (list
118 (mapcar #'(lambda (x) (coerce x 'double-float))
119 absorbtion))))
120 ;; *dep-var*
123 (defparameter *dep-var-int*
124 (make-vector (length absorbtion)
125 :type :row
126 :element-type 'integer
127 :initial-contents (list absorbtion)))
129 (typep *dep-var* 'matrix-like) ; => T
130 (typep *dep-var* 'vector-like) ; => T
132 (typep *indep-vars-1-matrix* 'matrix-like) ; => T
133 (typep *indep-vars-1-matrix* 'vector-like) ; => T
134 (typep *indep-vars-2-matrix* 'matrix-like) ; => T
135 (typep *indep-vars-2-matrix* 'vector-like) ; => F
137 iron
138 ;; following fails, need to ensure that we work on list elts, not just
139 ;; elts within a list:
140 ;; (coerce iron 'real)
142 ;; the following is a general list-conversion coercion approach -- is
143 ;; there a more efficient way?
144 (coerce 1 'real)
145 (mapcar #'(lambda (x) (coerce x 'double-float)) iron)
147 (princ "Data Set up"))
154 #+nil
155 (progn
156 ;; REVIEW: general Lisp use guidance
158 (documentation 'make-matrix 'function)
159 (fdefinition 'make-matrix)
161 #| Examples from CLHS, a bit of guidance.
165 ;; This function assumes its callers have checked the types of the
166 ;; arguments, and authorizes the compiler to build in that assumption.
167 (defun discriminant (a b c)
168 (declare (number a b c))
169 "Compute the discriminant for a quadratic equation."
170 (- (* b b) (* 4 a c))) => DISCRIMINANT
171 (discriminant 1 2/3 -2) => 76/9
173 ;; This function assumes its callers have not checked the types of the
174 ;; arguments, and performs explicit type checks before making any assumptions.
175 (defun careful-discriminant (a b c)
176 "Compute the discriminant for a quadratic equation."
177 (check-type a number)
178 (check-type b number)
179 (check-type c number)
180 (locally (declare (number a b c))
181 (- (* b b) (* 4 a c)))) => CAREFUL-DISCRIMINANT
182 (careful-discriminant 1 2/3 -2) => 76/9
189 #+nil
190 (progn ;; FIXME: Regression modeling
192 ;; data setup in previous FIXME
193 (defparameter m nil
194 "holding variable.")
195 ;; need to make vectors and matrices from the lists...
197 (def m (regression-model (list->vector-like iron)
198 (list->vector-like absorbtion)))
200 (def m (regression-model (list->vector-like iron)
201 (list->vector-like absorbtion) :print nil))
202 ;;Good
203 (send m :print)
204 (send m :own-slots)
205 (send m :own-methods)
206 ;; (lsos::ls-objects-methods m) ; bogus?
207 (send m :show)
209 (def m (regression-model (list->vector-like iron)
210 (list->vector-like absorbtion)))
212 (def m (regression-model (listoflists->matrix-like (list iron aluminum))
213 (list->vector-like absorbtion) :print nil))
216 (send m :compute)
217 (send m :sweep-matrix)
218 (format t "~%~A~%" (send m :sweep-matrix))
220 ;; need to get multiple-linear regression working (simple linear regr
221 ;; works)... to do this, we need to redo the whole numeric structure,
222 ;; I'm keeping these in as example of brokenness...
224 (send m :basis) ;; this should be positive?
225 (send m :coef-estimates) )
227 #+nil
228 (progn ;; FIXME: Need to clean up data examples, licenses, attributions, etc.
229 ;; The following breaks because we should use a package to hold
230 ;; configuration details, and this would be the only package outside
231 ;; of packages.lisp, as it holds the overall defsystem structure.
232 (load-data "iris.lsp") ;; (the above partially fixed).
233 (variables)
234 diabetes )
236 #+nil
237 (progn
239 ;; FIXME: Data.Frames probably deserve to be related to lists --
240 ;; either lists of cases, or lists of variables. We probably do not
241 ;; want to mix them, but want to be able to convert between such
242 ;; structures.
244 (defparameter *my-case-data*
245 '((:cases
246 (:case1 Y Med 3.4 5)
247 (:case2 N Low 3.2 3)
248 (:case3 Y High 3.1 4))
249 (:var-names (list "Response" "Level" "Pressure" "Size"))))
251 *my-case-data*
253 (elt *my-case-data* 1)
254 (elt *my-case-data* 0)
255 (elt *my-case-data* 2) ;; error
256 (elt (elt *my-case-data* 0) 1)
257 (elt (elt *my-case-data* 0) 0)
258 (elt (elt (elt *my-case-data* 0) 1) 0)
259 (elt (elt (elt *my-case-data* 0) 1) 1)
260 (elt (elt (elt *my-case-data* 0) 1) 2)
261 (elt (elt *my-case-data* 0) 3))
263 #+nil)
264 (progn ;; FIXME: read data from CSV file. To do.
266 ;; challenge is to ensure that we get mixed arrays when we want them,
267 ;; and single-type (simple) arrays in other cases.
269 (defparameter *csv-num* (read-csv "Data/example-num.csv" :type 'numeric))
270 (defparameter *csv-mix* (read-csv "Data/example-mixed.csv" :type 'data))
272 ;; The handling of these types should be compariable to what we do for
273 ;; matrices, but without the numerical processing. i.e. mref, bind2,
274 ;; make-dataframe, and the class structure should be similar.
276 ;; With numerical data, there should be a straightforward mapping from
277 ;; the data.frame to a matrix. With categorical data (including
278 ;; dense categories such as doc-strings, as well as sparse categories
279 ;; such as binary data), we need to include metadata about ordering,
280 ;; coding, and such. So the structures should probably consider
282 ;; Using the CSV file:
284 (asdf:oos 'asdf:compile-op 'csv :force t)
285 (asdf:oos 'asdf:load-op 'parse-number)
286 (asdf:oos 'asdf:load-op 'csv)
287 (fare-csv:read-csv-file "Data/example-numeric.csv")
289 ;; but I think the cl-csv package is broken, need to use the dsv-style
290 ;; package.
292 ;; now we've got the DSV code in the codebase, auto-loaded I hope:
293 cybertiggyr-dsv:*field-separator*
294 (defparameter *example-numeric.csv*
295 (cybertiggyr-dsv:load-escaped "Data/example-numeric.csv"
296 :field-separator #\,))
297 *example-numeric.csv*
299 ;; the following fails because we've got a bit of string conversion
300 ;; to do. 2 thoughts: #1 modify dsv package, but mucking with
301 ;; encapsulation. #2 add a coercion tool (better, but potentially
302 ;; inefficient).
303 #+nil(coerce (nth 3 (nth 3 *example-numeric.csv*)) 'double-float)
305 ;; cases, simple to not so
306 (defparameter *test-string1* "1.2")
307 (defparameter *test-string2* " 1.2")
308 (defparameter *test-string3* " 1.2 ")
314 #+nil
315 (progn ;; experiments with GSL and the Lisp interface.
316 (asdf:oos 'asdf:load-op 'gsll)
317 (asdf:oos 'asdf:load-op 'gsll-tests)
319 ;; the following should be equivalent
320 (setf *t1* (LIST 6.18d0 6.647777777777779d0 6.18d0))
321 (setf *t2* (MULTIPLE-VALUE-LIST
322 (LET ((VEC
323 (gsll:make-marray 'DOUBLE-FLOAT
324 :INITIAL-CONTENTS '(-3.21d0 1.0d0 12.8d0)))
325 (WEIGHTS
326 (gsll:MAKE-MARRAY 'DOUBLE-FLOAT
327 :INITIAL-CONTENTS '(3.0d0 1.0d0 2.0d0))))
328 (LET ((MEAN (gsll:MEAN VEC)))
329 (LIST (gsll:ABSOLUTE-DEVIATION VEC)
330 (gsll:WEIGHTED-ABSOLUTE-DEVIATION VEC WEIGHTS)
331 (gsll:ABSOLUTE-DEVIATION VEC MEAN))))))
332 (eql *t1* *t2*)
334 ;; from (gsll:examples 'gsll::numerical-integration) ...
335 (gsll:integration-qng gsll::one-sine 0.0d0 PI)
338 (defun-single axpb (x) (+ (* 2 x) 3)) ;; a<-2, b<-3
339 (gsll:integration-qng axpb 1d0 2d0)
341 (let ((a 2)
342 (b 3))
343 (defun-single axpb2 (x) (+ (* a x) b)))
344 (gsll:integration-qng axpb2 1d0 2d0)
347 #| BAD
348 (gsll:integration-qng
349 (let ((a 2)
350 (b 3))
351 (defun-single axpb2 (x) (+ (* a x) b)))
352 1d0 2d0)
355 ;; right, but weird expansion...
356 (gsll:integration-qng
357 (let ((a 2)
358 (b 3))
359 (defun axpb2 (x) (+ (* a x) b))
360 (def-single-function axpb2)
361 axpb2)
362 1d0 2d0)
364 ;; Linear least squares
366 (gsll:gsl-lookup "gsl_linalg_LU_decomp") ; => gsll:lu-decomposition
367 (gsll:gsl-lookup "gsl_linalg_LU_solve") ; => gsll:lu-solve
376 #+nil
377 (progn ;; philosophy time
379 (setf my-model (model :name "ex1"
380 :data-slots (list x y z)
381 :param-slots (list alpha beta gamma)
382 :math-form (regression-model :formula '(= y (+ (* beta x)
383 (* alpha y)
384 (* gamma z)
385 normal-error)))))
386 (setf my-dataset (statistical-table :table data-frame-contents
387 :metadata (list (:case-names (list ))
388 (:var-names (list ))
389 (:documentation "string of doc"))))
391 (setf my-analysis (analysis
392 :model my-model
393 :data my-dataset
394 :parameter-map (pairing (model-param-slots my-model)
395 (data-var-names my-dataset))))
397 ;; ontological implications -- the analysis is an abstract class of
398 ;; data, model, and mapping between the model and data. The fit is
399 ;; the instantiation of such. This provides a statistical object
400 ;; computation theory which can be realized as "executable
401 ;; statistics" or "computable statistics".
402 (setf my-analysis (analyze my-fit
403 :estimation-method 'linear-least-squares-regression))
405 ;; one of the tricks here is that one needs to provide the structure
406 ;; from which to consider estimation, and more importantly, the
407 ;; validity of the estimation.
410 (setf linear-least-squares-regression
411 (estimation-method-definition
412 :variable-defintions ((list
413 ;; from MachLearn: supervised,
414 ;; unsupervised
415 :data-response-vars list-drv ; nil if unsup
417 :param-vars list-pv
418 :data-predictor-vars list-dpv
419 ;; nil in this case. these
420 ;; describe "out-of-box" specs
421 :hyper-vars list-hv))
422 :form '(regression-additive-error
423 :central-form (linear-form drv pv dpv)
424 :error-form 'normal-error)
425 :resulting-decision '(point-estimation interval-estimation)
426 :philosophy 'frequentist
427 :documentation "use least squares to fit a linear regression
428 model to data."))
430 (defparameter *statistical-philosophies*
431 '(frequentist bayesian fiducial decision-analysis)
432 "can be combined to build decision-making approaches and
433 characterizations")
435 (defparameter *decisions*
436 '(estimation selection testing)
437 "possible results from a...")
438 ;; is this really true? One can embedded hypothesis testing within
439 ;; estimation, as the hypothesis estimated to select. And
440 ;; categorical/continuous rear their ugly heads, but not really in
441 ;; an essential way.
443 (defparameter *ontology-of-decision-procedures*
444 (list :decisions
445 (list :estimation
446 (list :point
447 (list :maximum-likelihood
448 :minimum-entropy
449 :least-squares
450 :method-of-moments)
451 :interval
452 (list :maximum-likelihood
454 :testing
455 (list :fisherian
456 :neyman-pearson
457 (list :traditional
458 :bioequivalence-inversion)
459 :selection
460 (list :ranking
461 :top-k-of-n-select))
462 :parametric
463 :partially-parametric))
464 "start of ontology"))