ls-demo.lisp

   1 ;;; -*- mode: lisp -*-
   2 ;;; Copyright (c) 2006-2008, by A.J. Rossini <blindglobe@gmail.com>
   3 ;;; See COPYRIGHT file for any additional restrictions (BSD license).
   4 ;;; Since 1991, ANSI was finally finished.  Edited for ANSI Common Lisp.
   5
   6 ;;; Time-stamp: <2009-08-18 08:07:56 tony>
   7 ;;; Creation:   sometime in 2006...
   8 ;;; File:       ls-demo.lisp
   9 ;;; Author:     AJ Rossini <blindglobe@gmail.com>
  10 ;;; Copyright:  (c) 2007, AJ Rossini.  BSD.
  11 ;;; Purpose:    demonstrations of how one might use CLSv2.
  12
  13 ;;; What is this talk of 'release'? Klingons do not make software
  14 ;;; 'releases'.  Our software 'escapes', leaving a bloody trail of
  15 ;;; designers and quality assurance people in its wake.
  16
  17 (in-package :cl-user)
  18
  19
  20 ;; (asdf:oos 'asdf:compile-op 'cls :force t)
  21 (asdf:oos 'asdf:load-op 'cls)
  22
  23 (in-package :ls-user)
  24
  25 ;; a bit of infrastructure for beginners
  26 (defparameter *my-cls-homedir*
  27   "/media/disk/Desktop/sandbox/CLS.git/")
  28 (concatenate 'string *my-cls-homedir* "Data/example.csv")
  29 ;; implies
  30 (defun localized-pathto (x)
  31   (check-type x string)
  32   (concatenate 'string *my-cls-homedir* x))
  33
  34 ;;; == READ DATA
  35
  36 (defparameter *my-df-1*
  37   (make-instance 'dataframe-array
  38                  :storage #2A((1 2 3 4 5)
  39                               (10 20 30 40 50))
  40                  :doc "This is an un-interesting dataframe-array"
  41                  :case-labels (list "x" "y")
  42                  :var-labels (list "a" "b" "c" "d" "e")))
  43
  44 (setf (xref *my-df-1* 0 0) -1d0)
  45 ;; *my-df-1*
  46
  47
  48 (make-dataframe  #2A((1 2 3 4 5)
  49                      (10 20 30 40 50)))
  50
  51 (make-dataframe (rand 4 3))
  52
  53
  54
  55
  56 (defparameter *my-df-2*
  57   (make-dataframe  #2A((1 2 3 4 5)
  58                        (10 20 30 40 50))
  59                    :caselabels (list "x" "y")
  60                    :varlabels (list "a" "b" "c" "d" "e")
  61                    :doc "This is another boring dataframe-array"))
  62
  63 (caselabels *my-df-1*)
  64 (varlabels *my-df-1*)
  65
  66
  67
  68 (defparameter *my-df-2*
  69   (make-dataframe  #2A((a 2    T  4  5)
  70                        (b 20 nil 40 50))
  71                    :caselabels (list "x" "y")
  72                    :varlabels (list "a" "b" "c" "d" "e")
  73                    :doc "This is another boring dataframe-array"))
  74
  75 ;; *my-df-2*
  76
  77
  78 ;;; HERE#1
  79
  80 ;;; read in a CSV dataframe...
  81
  82
  83 ;; a better approach is:
  84 (asdf:oos 'asdf:load-op 'rsm-string)
  85 (rsm.string:file->string-table
  86  (localized-pathto "Data/example-mixed.csv")
  87  :delims ",")
  88
  89 (rsm.string:file->number-table
  90  (localized-pathto "Data/example-numeric.csv")
  91  :delims ",")
  92
  93 (rsm.string:file->number-table
  94  (localized-pathto "Data/R-chickwts.csv")
  95  :delims ",")
  96 (rsm.string:file->string-table
  97  (localized-pathto "Data/R-chickwts.csv")
  98  :delims ",")
  99
 100 (defparameter *my-df-2*
 101   (make-instance 'dataframe-array
 102                  :storage
 103                  (listoflist->array
 104                    (rsm.string:file->string-table
 105                     (localized-pathto "Data/example-mixed.csv")))
 106                  :doc "This is an interesting dataframe-array"))
 107 ;; *my-df-2*
 108
 109 (defparameter *my-df-3*
 110   (make-instance 'dataframe-array
 111                  :storage
 112                  (listoflist->array
 113                   (transpose-listoflist
 114                    (rsm.string:file->number-table
 115                     (localized-pathto "Data/example-numeric.csv"))))
 116                  :doc "This is an interesting dataframe-array"))
 117 ;; *my-df-3*
 118
 119
 120 (defparameter *my-df-4*
 121   (make-instance 'dataframe-array
 122                  :storage
 123                  (listoflist->array
 124                    (rsm.string:file->number-table
 125                     (localized-pathto "Data/R-chickwts.csv")
 126                     :delims ","))
 127                  :doc "This is an interesting dataframe-array that currently fails"))
 128 ;; *my-df-4*
 129
 130
 131 (defparameter *my-df-5*
 132   (make-instance 'dataframe-array
 133                  :storage
 134                  (listoflist->array
 135                   (transpose-listoflist
 136                    (rsm.string:file->number-table
 137                     (localized-pathto "Data/R-swiss.csv"))))
 138                  :doc "This is an interesting dataframe-array that currently fails"))
 139 ;; *my-df-5*
 140
 141
 142 (defparameter *mat-1*
 143   (make-matrix 3 3
 144                :initial-contents #2A((2d0 3d0 4d0) (3d0 2d0 4d0) (4d0 4d0 5d0))))
 145
 146 (defparameter *mat-1*
 147   (make-matrix 3 3
 148      :initial-contents #2A((2d0 3d0 -4d0)
 149                            (3d0 2d0 -4d0)
 150                            (4d0 4d0 -5d0))))
 151 (mref *mat-1* 2 0)
 152
 153 (defparameter *mat-2*
 154   (let ((m (rand 3 3)))
 155     (m* m (transpose m))))
 156
 157 (axpy 100.0d0 *mat-2* (eye 3 3))
 158
 159 (potrf (copy *mat-2*)) ;; factor
 160 (potri (copy *mat-2*)) ;; invert
 161 (minv-cholesky (copy *mat-2*))
 162 (m*  (minv-cholesky (copy *mat-2*)) *mat-2*)
 163
 164 (defparameter *mat-3*
 165   (make-matrix
 166    3 3
 167    :initial-contents '((16d0 13d0 12d0)
 168                        (13d0 22d0 7d0)
 169                        (12d0 7d0  17d0))))
 170
 171 (potrf (copy *mat-3*)) ;; factor
 172
 173 #|
 174  *mat-3* =>
 175  #<LA-SIMPLE-MATRIX-DOUBLE  3 x 3
 176   16.0 13.0 12.0
 177   13.0 22.0 7.0
 178   12.0 7.0 17.0>
 179
 180  (potrf (copy *mat-3*)) =>
 181  (#<LA-SIMPLE-MATRIX-DOUBLE  3 x 3
 182   4.0 3.25 3.0
 183   13.0 3.3819373146171707 -0.8131433980500301
 184   12.0 7.0 2.7090215603069034>
 185   "U" NIL)
 186
 187  ;; and compare with...
 188
 189  > testm <- matrix(data=c(16,13,12,13,22,7,12,7,17),nrow=3)
 190  > chol(testm)
 191       [,1]     [,2]       [,3]
 192  [1,]    4 3.250000  3.0000000
 193  [2,]    0 3.381937 -0.8131434
 194  [3,]    0 0.000000  2.7090216
 195  >
 196
 197  ;; which suggests that the major difference is that R zero's out the
 198  ;; appropriate terms, and that CLS does not.
 199
 200 |#
 201
 202 (potri (copy *mat-2*)) ;; invert
 203 (minv-cholesky (copy *mat-2*))
 204 (m*  (minv-cholesky (copy *mat-2*)) *mat-2*)
 205
 206
 207 #|
 208  (lu-decomp  #2A((2 3 4) (1 2 4) (2 4 5)))
 209  ;; => (#2A((2.0 3.0 4.0) (1.0 1.0 1.0) (0.5 0.5 1.5)) #(0 2 2) -1.0 NIL)
 210  (lu-solve
 211   (lu-decomp #2A((2 3 4) (1 2 4) (2 4 5)))
 212   #(2 3 4))
 213  ;; => #(-2.333333333333333 1.3333333333333335 0.6666666666666666)
 214 |#
 215 (getrf
 216  (make-matrix 3 3
 217               :initial-contents #2A((2d0 3d0 4d0) (1d0 2d0 4d0) (2d0 4d0 5d0))))
 218
 219 #| => ; so not so good for the vector, but matrix matches.
 220  (#<LA-SIMPLE-MATRIX-DOUBLE  3 x 3
 221   2.0 3.0 4.0
 222   1.0 1.0 1.0
 223   0.5 0.5 1.5>
 224   #<FNV-INT32 (3) 1 3 3> NIL)
 225 |#
 226
 227 (msolve-lu
 228  (make-matrix 3 3
 229               :initial-contents #2A((2d0 3d0 4d0)
 230                                     (1d0 2d0 4d0)
 231                                     (2d0 4d0 5d0)))
 232  (make-vector 3 :type :column
 233               :initial-contents '((2d0)
 234                                   (3d0)
 235                                   (4d0))))
 236
 237 #| =>
 238   #<LA-SIMPLE-VECTOR-DOUBLE (3 x 1)
 239    -2.3333333333333335
 240    1.3333333333333335
 241    0.6666666666666666>
 242 |#
 243
 244
 245
 246 ;;; LU common applications
 247
 248 (defun minv-lu (a)
 249   "invert A using LU Factorization"
 250   (let ((a-fac (getrf (copy a))))
 251     (first (getri (first a-fac) (second a-fac)))))
 252
 253 #+nil (progn
 254         (let ((m1 (rand 3 3)))
 255           (m* m1 (minv-lu m1))))
 256
 257 (defun msolve-lu (a b)
 258   "Compute `x1' solving `A x = b', with LU factorization."
 259   (let ((a-fac (getrf (copy a))))
 260     (first (getrs (first a-fac) b (second a-fac)))))
 261
 262
 263
 264 ;; (inverse #2A((2 3 4) (1 2 4) (2 4 5)))
 265 ;; #2A((2.0 -0.33333333333333326 -1.3333333333333335)
 266 ;;     (-1.0 -0.6666666666666666 1.3333333333333333)
 267 ;;     (0.0 0.6666666666666666 -0.3333333333333333))
 268
 269 (minv-lu
 270    (make-matrix
 271     3 3
 272     :initial-contents #2A((2d0 3d0 4d0)
 273                           (1d0 2d0 4d0)
 274                           (2d0 4d0 5d0))))
 275
 276 #|
 277
 278  #<LA-SIMPLE-MATRIX-DOUBLE  3 x 3
 279   2.0 -0.3333333333333333 -1.3333333333333333
 280   -1.0 -0.6666666666666666 1.3333333333333333
 281   0.0 0.6666666666666666 -0.3333333333333333>
 282
 283  ;; so is correct.
 284
 285 |#
 286
 287 ;;;;;HERE#2
 288
 289 (factorize
 290  (make-matrix 3 3
 291               :initial-contents #2A((2d0 3d0 4d0)
 292                                     (1d0 2d0 4d0)
 293                                     (2d0 4d0 5d0)))
 294  :by :svd)
 295
 296 ;; (sv-decomp  #2A((2 3 4) (1 2 4) (2 4 5)))
 297 ;; (#2A((-0.5536537653489974 0.34181191712789266 -0.7593629708013371)
 298 ;;      (-0.4653437312661058 -0.8832095891230851 -0.05827549615722014)
 299 ;;      (-0.6905959164998124 0.3211003503429828 0.6480523475178517))
 300 ;;  #(9.699290438141343 0.8971681569301373 0.3447525123483081)
 301 ;;  #2A((-0.30454218417339873 0.49334669582252344 -0.8147779426198863)
 302 ;;      (-0.5520024849987308 0.6057035911404464 0.5730762743603965)
 303 ;;      (-0.7762392122368734 -0.6242853493399995 -0.08786630745236332))
 304 ;;  T)
 305
 306 ()
 307
 308 (qr-decomp  #2A((2 3 4) (1 2 4) (2 4 5)))
 309 ;; (#2A((-0.6666666666666665 0.7453559924999298 5.551115123125783e-17)
 310 ;;      (-0.3333333333333333 -0.2981423969999719 -0.894427190999916)
 311 ;;      (-0.6666666666666666 -0.5962847939999439 0.44721359549995787))
 312 ;;  #2A((-3.0 -5.333333333333334 -7.333333333333332)
 313 ;;      (0.0 -0.7453559924999292 -1.1925695879998877)
 314 ;;      (0.0 0.0 -1.3416407864998738)))
 315
 316 (rcondest #2A((2 3 4) (1 2 4) (2 4 5)))
 317 ;; 6.8157451e7
 318 ;;; CURRENTLY FAILS!!
 319
 320 (eigen #2A((2 3 4) (1 2 4) (2 4 5)))
 321 ;; (#(10.656854249492381 -0.6568542494923802 -0.9999999999999996)
 322 ;;  (#(0.4999999999999998 0.4999999999999997 0.7071067811865475)
 323 ;;   #(-0.49999999999999856 -0.5000000000000011 0.7071067811865474)
 324 ;;   #(0.7071067811865483 -0.7071067811865466 -1.2560739669470215e-15))
 325 ;;  NIL)
 326
 327 (spline #(1.0 1.2 1.3 1.8 2.1 2.5)
 328         #(1.2 2.0 2.1 2.0 1.1 2.8) :xvals 6)
 329 ;; ((1.0 1.3 1.6 1.9 2.2 2.5)
 330 ;;  (1.2 2.1 2.2750696543866313 1.6465231041904045 1.2186576148879609 2.8))
 331
 332 ;;; using KERNEL-SMOOTH-FRONT, not KERNEL-SMOOTH-CPORT
 333 (kernel-smooth #(1.0 1.2 1.3 1.8 2.1 2.5)
 334                #(1.2 2.0 2.1 2.0 1.1 2.8) :xvals 5)
 335 ;; ((1.0 1.375 1.75 2.125 2.5)
 336 ;;  (1.6603277642110226 1.9471748095239771 1.7938127405752287
 337 ;;   1.5871511322219498 2.518194783156392))
 338
 339 (kernel-dens #(1.0 1.2 2.5 2.1 1.8 1.2) :xvals 5)
 340 ;; ((1.0 1.375 1.75 2.125 2.5)
 341 ;;  (0.7224150453621405 0.5820045548233707 0.38216411702854214
 342 ;;   0.4829822708587095 0.3485939156929503))
 343
 344 (fft #(1.0 1.2 2.5 2.1 1.8))
 345 ;; #(#C(1.0 0.0) #C(1.2 0.0) #C(2.5 0.0) #C(2.1 0.0) #C(1.8 0.0))
 346
 347 (lowess #(1.0 1.2 2.5 2.1 1.8 1.2)  #(1.2 2.0 2.1 2.0 1.1 2.8))
 348 ;; (#(1.0 1.2 1.2 1.8 2.1 2.5))
 349
 350
 351
 352 ;;;; Special functions
 353
 354 ;; Log-gamma function
 355
 356 (log-gamma 3.4) ;;1.0923280596789584
 357
 358
 359
 360 ;;;; Probability functions
 361
 362 ;; looking at these a bit more, perhaps a more CLOSy style is needed, i.e.
 363 ;; (quantile :list-or-cons loc :type type (one of 'empirical 'normal 'cauchy, etc...))
 364 ;; similar for the cdf, density, and rand.
 365 ;; Probably worth figuring out how to add a new distribution
 366 ;; efficiently, i.e. by keeping some kind of list.
 367
 368 ;; Normal distribution
 369
 370 (normal-quant 0.95) ;;1.6448536279366268
 371 (normal-cdf 1.3) ;;0.9031995154143897
 372 (normal-dens 1.3) ;;0.17136859204780736
 373 (normal-rand 2) ;;(-0.40502015f0 -0.8091404f0)
 374
 375 (bivnorm-cdf 0.2 0.4 0.6) ;;0.4736873734160288
 376
 377 ;; Cauchy distribution
 378
 379 (cauchy-quant 0.95) ;;6.313751514675031
 380 (cauchy-cdf 1.3) ;;0.7912855998398473
 381 (cauchy-dens 1.3) ;;0.1183308127104695
 382 (cauchy-rand 2) ;;(-1.06224644160405 -0.4524695943939537)
 383
 384 ;; Gamma distribution
 385
 386 (gamma-quant 0.95 4.3) ;;8.178692439291645
 387 (gamma-cdf 1.3 4.3) ;;0.028895150986674906
 388 (gamma-dens 1.3 4.3) ;;0.0731517686447374
 389 (gamma-rand 2 4.3) ;;(2.454918912880936 4.081365384357454)
 390
 391 ;; Chi-square distribution
 392
 393 (chisq-quant 0.95 3) ;;7.814727903379012
 394 (chisq-cdf 1 5) ;;0.03743422675631789
 395 (chisq-dens 1 5) ;;0.08065690818083521
 396 (chisq-rand 2 4) ;;(1.968535826180572 2.9988646156942997)
 397
 398 ;; Beta distribution
 399
 400 (beta-quant 0.95 3 2) ;;0.9023885371149876
 401 (beta-cdf 0.4 2 2.4) ;;0.4247997418541529
 402 (beta-dens 0.4 2 2.4) ;;1.5964741858913518
 403 (beta-rand 2 2 2.4) ;;(0.8014897077282279 0.6516371997922659)
 404
 405 ;; t distribution
 406
 407 (t-quant 0.95 3) ;;2.35336343484194
 408 (t-cdf 1 2.3) ;;0.794733624298342
 409 (t-dens 1 2.3) ;;0.1978163816318102
 410 (t-rand 2 2.3) ;;(-0.34303672776089306 -1.142505872436518)
 411
 412 ;; F distribution
 413
 414 (f-quant 0.95 3 5) ;;5.409451318117459
 415 (f-cdf 1 3.2 5.4) ;;0.5347130905510765
 416 (f-dens 1 3.2 5.4) ;;0.37551128864591415
 417 (f-rand 2 3 2) ;;(0.7939093442091963 0.07442694152491144)
 418
 419 ;; Poisson distribution
 420
 421 (poisson-quant 0.95 3.2) ;;6
 422 (poisson-cdf 1 3.2) ;;0.17120125672252395
 423 (poisson-pmf 1 3.2) ;;0.13043905274097067
 424 (poisson-rand 5 3.2) ;;(2 1 2 0 3)
 425
 426 ;; Binomial distribution
 427
 428 (binomial-quant 0.95 3 0.4) ;;; DOESN'T RETURN
 429 (binomial-quant 0 3 0.4) ;;; -2147483648
 430 (binomial-cdf 1 3 0.4) ;;0.6479999999965776
 431 (binomial-pmf 1 3 0.4) ;;0.4320000000226171
 432 (binomial-rand 5 3 0.4) ;;(2 2 0 1 2)
 433
 434 ;;;; OBJECT SYSTEM
 435
 436 (in-package :ls-user)
 437 (defproto *test-proto*)
 438 *test-proto*
 439 (defmeth *test-proto* :make-data (&rest args) nil)
 440
 441 (defparameter my-proto-instance nil)
 442 (setf my-proto-instance (send *test-proto* :new))
 443 (send *test-proto* :own-slots)
 444 (lsos::ls-object-slots *test-proto*)
 445 (lsos::ls-object-methods *test-proto*)
 446 (lsos::ls-object-parents *test-proto*)
 447 (lsos::ls-object-preclist *test-proto*)
 448 ;;; The following fail and I do not know why?
 449 (send *test-proto* :has-slot 'proto-name)
 450 (send *test-proto* :has-slot 'PROTO-NAME)
 451 (send *test-proto* :has-slot 'make-data)
 452 (send *test-proto* :has-slot 'MAKE-DATA)
 453 (send *test-proto* :has-method 'make-data)
 454 (send *test-proto* :has-method 'MAKE-DATA)
 455
 456
 457 (defproto2 *test-proto3* (list) (list) (list) "test doc" t)
 458 (defproto2 *test-proto4*)
 459 *test-proto2*
 460 (defmeth *test-proto* :make-data (&rest args) nil)
 461
 462 (defparameter my-proto-instance nil)
 463 (setf my-proto-instance (send *test-proto* :new))
 464 (send *test-proto* :own-slots)
 465 (send *test-proto* :has-slot 'proto-name)
 466 (send *test-proto* :has-slot 'PROTO-NAME)
 467
 468
 469 ;;;; Testing
 470
 471 (in-package :lisp-stat-unittests)
 472 (testsuites)
 473 (print-tests)
 474 (run-tests)
 475 (last-test-status)
 476 ;;(failures)
 477
 478 (describe (run-tests :suite 'lisp-stat-ut-testsupport))
 479 (describe (run-tests :suite 'lisp-stat-ut-testsupport2))
 480
 481 (testsuite-tests 'lisp-stat-ut)
 482 (run-tests :suite 'lisp-stat-ut)
 483 (describe (run-tests :suite 'lisp-stat-ut))
 484
 485 (run-tests :suite 'lisp-stat-ut-probdistn)
 486 (describe (run-tests :suite 'lisp-stat-ut-probdistn))
 487 (run-tests :suite 'lisp-stat-ut-spec-fns)
 488 (describe (run-tests :suite 'lisp-stat-ut-spec-fns))
 489
 490 (find-testsuite 'lisp-stat-ut-lin-alg)
 491 (testsuite-tests 'lisp-stat-ut-lin-alg)
 492 (run-tests :suite 'lisp-stat-ut-lin-alg)
 493 (describe (run-tests :suite 'lisp-stat-ut-lin-alg))
 494
 495 ;;;; Data Analysis test
 496
 497 (in-package :ls-user)
 498
 499 ;; LispStat 1 approach to variables
 500
 501 (progn
 502   (def iron  (list 61 175 111 124 130 173 169 169 160 224 257 333 199))
 503   iron
 504   (def aluminum (list 13 21 24 23 64 38 33 61 39 71 112 88 54))
 505   aluminum
 506   (def absorbtion (list 4 18 14 18 26 26 21 30 28 36 65 62 40))
 507   absorbtion
 508
 509   ;; LispStat 1 approach to data frames... (list of lists).
 510
 511   (DEF DIABETES
 512       (QUOTE ((80 97 105 90 90 86 100 85 97 97 91 87 78 90 86 80 90 99 85 90 90 88 95 90 92 74 98 100 86 98 70 99 75 90 85 99 100 78 106 98 102 90 94 80 93 86 85 96 88 87 94 93 86 86 96 86 89 83 98 100 110 88 100 80 89 91 96 95 82 84 90 100 86 93 107 112 94 93 93 90 99 93 85 89 96 111 107 114 101 108 112 105 103 99 102 110 102 96 95 112 110 92 104 75 92 92 92 93 112 88 114 103 300 303 125 280 216 190 151 303 173 203 195 140 151 275 260 149 233 146 124 213 330 123 130 120 138 188 339 265 353 180 213 328 346)
 513               (356 289 319 356 323 381 350 301 379 296 353 306 290 371 312 393 364 359 296 345 378 304 347 327 386 365 365 352 325 321 360 336 352 353 373 376 367 335 396 277 378 360 291 269 318 328 334 356 291 360 313 306 319 349 332 323 323 351 478 398 426 439 429 333 472 436 418 391 390 416 413 385 393 376 403 414 426 364 391 356 398 393 425 318 465 558 503 540 469 486 568 527 537 466 599 477 472 456 517 503 522 476 472 455 442 541 580 472 562 423 643 533 1468 1487 714 1470 1113 972 854 1364 832 967 920 613 857 1373 1133 849 1183 847 538 1001 1520 557 670 636 741 958 1354 1263 1428 923 1025 1246 1568)
 514               (124 117 143 199 240 157 221 186 142 131 221 178 136 200 208 202 152 185 116 123 136 134 184 192 279 228 145 172 179 222 134 143 169 263 174 134 182 241 128 222 165 282 94 121 73 106 118 112 157 292 200 220 144 109 151 158 73 81 151 122 117 208 201 131 162 148 130 137 375 146 344 192 115 195 267 281 213 156 221 199 76 490 143 73 237 748 320 188 607 297 232 480 622 287 266 124 297 326 564 408 325 433 180 392 109 313 132 285 139 212 155 120 28 23 232 54 81 87 76 42 102 138 160 131 145 45 118 159 73 103 460 42 13 130 44 314 219 100 10 83 41 77 29 124 15)
 515               (3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 2 2 3 2 2 3 3 3 3 2 3 3 3 3 3 2 3 3 3 3 3 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1))))
 516
 517
 518   (DEF DLABS (QUOTE ("GLUFAST" "GLUTEST" "INSTEST" "CCLASS")))
 519   (format t "loaded data.~%")
 520   )  ;; eval at this point.
 521
 522 ;; Simple univariate variable-specific descriptions.
 523 (fivnum absorbtion)
 524 (median absorbtion)
 525 (sort-data absorbtion)
 526 (rank absorbtion)
 527 (standard-deviation absorbtion)
 528 (interquartile-range absorbtion)
 529
 530 (lisp-stat-matrix::bind-columns aluminum iron)
 531 (bind-columns aluminum iron)
 532 (apply #'bind-columns (list aluminum iron))
 533 (lisp-stat-matrix::bind-columns  #2a((1 2)(3 4)) #(5 6))
 534 (bind-columns #2a((1 2)(3 4)) #(5 6))
 535
 536
 537 (defparameter fit1 nil)
 538 (setf fit1 (regression-model absorbtion iron))
 539 (send fit1 :display)
 540 (send fit1 :residuals)
 541
 542 iron
 543 (defparameter fit1a nil)
 544 (setf fit1a (regression-model absorbtion iron :print nil))
 545 (send fit1a :doc)
 546 ;;  (setf (send fit1a :doc) "this") ;; FIXME: this error...
 547 (send fit1a :doc "this") ;; FIXME: this is a more natural
 548 (send fit1a :doc)
 549 (send fit1a :x)
 550 (send fit1a :y)
 551 (send fit1a :compute)
 552 (send fit1a :sweep-matrix)
 553 (send fit1a :basis)
 554 (send fit1a :residuals)
 555 (send fit1a :display)
 556
 557 #+nil(progn
 558        ;; syntax example
 559        (array-dimension #2A ((1)) 0)
 560        )
 561
 562 ;;; FIXME: need to get multiple-linear regression working -- clearly
 563 ;;; simple linear is working above!
 564 (defvar m nil "holding variable.")
 565 (def m (regression-model (list iron aluminum) absorbtion :print nil))
 566 (send m :compute)
 567 (send m :sweep-matrix)
 568 (format t "~%~A~%" (send m :sweep-matrix))
 569
 570  ;; ERROR... FIX-ME!!
 571 (send m :basis) ;; this should be positive?
 572 (send m :coef-estimates)
 573
 574 (send m :display)
 575 (def m (regression-model (bind-columns iron aluminum) absorbtion))
 576 (send m :help)
 577 (send m :help :display)
 578 (send m :help :basis)
 579 ;; No graphics!  But handle the error gracefully...
 580 (send m :plot-residuals)
 581
 582
 583 (typep aluminum 'sequence)
 584 (typep iron 'sequence)
 585 (matrixp iron)
 586
 587 *variables*
 588
 589 (variables)
 590 (undef 'iron)
 591 (variables)
 592
 593 ;;; Plotting!
 594
 595 (asdf:oos 'asdf:compile-op 'cl-cairo2 :force t)
 596 (asdf:oos 'asdf:load-op 'cl-cairo2)
 597
 598 ;; The above can be used to generate PDF, PS, PNG, and X11/Microsoft
 599 ;; displays (the latter being a proof of concept, of limited use for
 600 ;; "real work".
 601
 602 ;; and this below, as well.
 603 (asdf:oos 'asdf:load-op 'cl-plplot)
 604
 605 ;;; Using R!
 606
 607 (asdf:oos 'asdf:compile-op 'rclg :force t)
 608 (asdf:oos 'asdf:load-op 'rclg)
 609
 610
 611 (in-package :rclg-user)
 612
 613 ;; rclg-init::*r-started*
 614
 615 ;;;#3 Start R within Lisp
 616
 617 (start-rclg)
 618 ;; rclg-init::*r-started*
 619 (rclg-init::check-stack)
 620 (r "Cstack_info")
 621 (defparameter *x* (r seq 1 10))
 622 (defparameter *y* (r rnorm 10))
 623 *y*
 624 (r plot *x* *y*)
 625 *y*
 626
 627 (defparameter *r-version* (r "version"))
 628
 629 ;; This is for illustrative purposes only.  It is not a "good" use of rnbi.
 630 ;; Really, you'll want rnbi to hold anonymous intermeditae results, like:
 631 (r plot *x* (rnbi rnorm 10))
 632
 633 (r "Sys.getenv" "LD_LIBRARY_PATH")
 634 (r "Sys.getenv" "LD_PRELOAD")
 635
 636 (r "ls")
 637 (r ls)
 638 (r "search")
 639
 640 (r "geterrmessage")
 641
 642 (r "library" "stats")
 643 (r library "MASS")
 644 (r "library" "Biobase")
 645
 646 (setf my.lib "Biobase")
 647 my.lib
 648 (r library my.lib)
 649
 650 (r "ls")
 651
 652 (r "print.default" 3)
 653 (r "rnorm" 10)
 654
 655 ;; Working in the R space
 656
 657 (r assign "x" 5)
 658 (r assign "x2" (list 1 2 3 5))
 659
 660 (r assign "x2" #(1 2 3 5 3 4 5))
 661 (r assign "z" "y") ;; unlike the above, this assigns character data
 662 (r "ls")
 663 (r ls)
 664
 665 (setf my.r.x2 (r get "x2"))  ;; moving data from R to CL
 666 (r assign "x2" my.r.x2)  ;; moving data from CL to R
 667
 668 ;; The following is not the smartest thing to do!
 669 ;;(r q)
 670
 671
 672
 673 ;;; How might we do statistics with Common Lisp?
 674 ;;; How might we work with a data.frame?
 675 ;;; What could the structures be?
 676 ;;; How much hinting, and of what type, should drive the data
 677 ;;; analysis?
 678
 679 (defpackage :my-data-analysis-example
 680   (:documentation "Example work-package for a data analysis")
 681   (:use :common-lisp :lisp-stat)
 682   (:export results figures report))
 683
 684 (in-package :my-data-analysis-example)
 685
 686 (defvar my-dataset1 (read-file "data/test1.lisp"))
 687 ;; or
 688 (defvar my-dataset2 (read-file "data/test1.csv" :type 'csv))
 689
 690 ;;; manipulate
 691
 692 (setf my-dataset2 (set-description my-datasets2
 693                                    :dependent-variables (list of symbols)))
 694 (setf my-dataset2 (set-description my-datasets2
 695                                    :independent-variables (list of symbols)))
 696
 697 ;; the following could be true in many cases.
 698 (assert
 699  (list-intersection (get-description my-datasets2 :independent-variables)
 700                     (get-description my-datasets2 :dependent-variables)))
 701 ;;
 702 ;; but we could phrase better,i.e.
 703 ;;
 704 (get-description
 705  my-datasets2
 706  :predicate-list-on-variable-metadata (list (and 'independent-variables
 707                                                  'dependent-variables)))
 708
 709
 710 ;; statistical relations re: input/output, as done above, is one
 711 ;; issue, another one is getting the right approach for statistical
 712 ;; typing, i.e.
 713 (get-description
 714  my-datasets2
 715  :predicate-list-on-variable-metadata (list 'ordinal-variables))
 716
 717
 718 ;; so we could use a set of logical ops to selection from variable
 719 ;; metadata, i.e.
 720 ;;    and, or, not
 721 ;; do we really need the simplifying extensions?
 722
 723
 724 ;;; output to REPL
 725
 726 (report my-dataset1 :style 'five-num)
 727 (report my-dataset1 :style 'univariate)
 728 (report my-dataset1 :style 'bivariate)
 729 (report my-dataset1 :style 'metadata)
 730
 731 ;;; to file?
 732
 733 (report my-dataset1
 734         :style 'five-num
 735         :format 'pdf
 736         :stream (filename-as-stream "my-dataset1-5num.pdf"))
 737 (report my-dataset1 :style 'univariate)
 738 (report my-dataset1 :style 'bivariate)
 739 (report my-dataset1 :style 'metadata)
 740
 741 ;;; so report could handle datasets... and models?
 742
 743 (report my-model :style 'formula)
 744 (report my-model :style 'simulate
 745         (list :parameters (:eta 5 :mu 4 :sigma (list 2 1 0.5))
 746               :number-of-reps 10))
 747 ;; should return a list of parameters along with range information,
 748 ;; useful for auto-building the above.   Note that there are 3 types
 749 ;; of parameters that can be considered -- we can have values which
 750 ;; define ddata, we can have values which define fixed values and some
 751 ;; could be things tht we estimate.
 752
 753
 754 (defgeneric report (object &optional style format stream)
 755   (:documentation "method for reporting on data"))
 756
 757 (defmethod report ((object dataset)
 758                    (style report-dataset-style-type)
 759                    (format output-format-type)
 760                    ((stream *repl*) output-stream-type))
 761   "dataset reporting")
 762
 763
 764 (defmethod report ((object model)
 765                    (style report-model-style-type)
 766                    (format output-format-type)
 767                    ((stream *repl*) output-stream-type))
 768   "model reporting")
 769
 770 (defmethod report ((object analysis-instance)
 771                    (style report-analysis-style-type)
 772                    (format output-format-type)
 773                    ((stream *repl*) output-stream-type))
 774   "model + dataset reporting")
 775
 776
 777 ;; parameters are just things which get filled with values, repeatedly
 778 ;; with data, or by considering to need estimation.
 779 (parameters my-model)
 780 (parameters my-model :type 'data)
 781 (parameters my-model :type 'fixed)
 782 (parameters my-model :type 'estimate)
 783 (parameters my-model :type '(estimate fixed))
 784 (parameters my-model :list-types) ;; useful for list-based extraction
 785 ;; of particular types
 786
 787 (setf my-model-data-instance
 788       (compute model data :specification (list :spec 'linear-model
 789                                                :depvar y
 790                                                :indepvar (list x1 x2))))
 791 (report my-model-data-instance)
 792
 793
 794 ;;; So how might we use this?  Probably need to consider the
 795 ;;; serialization of any lisp objects generated, perhaps via some form
 796 ;;; of memoization...?
 797 (in-package :cl-user)
 798
 799 (my-data-analysis-example:report :type 'full)
 800 (my-data-analysis-example:report :type 'summary)
 801 (my-data-analysis-example:figures :type 'pdf :file "results-figs.pdf")
 802
 803 (my-data-analysis-example:report)
 804
 805 ;;; more stuff
 806
 807 (send m :display)
 808 (def m (regression-model (bind-columns iron aluminum) absorbtion))
 809 (send m :help)
 810 (send m :help :display)
 811 (send m :help :basis)
 812
 813 (send m :plot-residuals)
 814
 815 (progn
 816   ;; General Lisp, there is also a need to add, remove symbols from the
 817   ;; workspace/namespace.  This is a fundamental skill, similar to
 818   ;; stopping, which is critical.
 819
 820   ;; boundp, fboundp
 821   ;; makunbound, fmakunbound
 822   )
 823
 824
 825 (progn
 826   ;;; A study in array vs list access
 827   (defparameter *x* (list 1 2 3))
 828   (defparameter *y* #(1 2 3))
 829   (defparameter *z* (list 1 (list 2 3) (list 4 5 (list 6 7)) ))
 830   (length *x*)
 831   (length *y*)
 832   (length *z*) ; => need a means to make this 7.
 833   (length  (reduce #'cons *z*)) ; => not quite -- missing iterative
 834
 835   (nelts *x*)
 836   (nth 1 *x*)
 837   (aref *y* 1)
 838   (setf (nth 1 *x*) 6)
 839   *x*
 840   (setf (aref *y* 1) 6)
 841   *y*
 842   )
 843
 844 (in-package :ls-user)
 845
 846 (progn
 847   (defparameter *x* (make-vector 5 :initial-contents '((1d0 2d0 3d0 4d0 5d0))))
 848   ;; estimating a mean, simple way.
 849   (/ (loop for i from 0 to (- (nelts *x*) 1)
 850         summing (vref *x* i))
 851      (nelts *x*))
 852
 853   (defun mean (x)
 854     (checktype x 'vector-like)
 855     (/ (loop for i from 0 to (- (nelts *x*) 1)
 856           summing (vref *x* i))
 857        (nelts *x*)))
 858
 859   ;; estimating variance, Moments
 860   (let ((meanx (mean *x*))
 861         (n (nelts *x*)))
 862     (/ (loop for i from 0 to (1-  n)
 863           summing (* (- (vref *x* i) meanx)
 864                      (- (vref *x* i) meanx)))
 865        n))
 866
 867   ;; estimating variance, Moments
 868   (let ((meanx (mean *x*))
 869         (nm1 (1- (nelts *x*))))
 870     (/ (loop for i from 0 to nm1
 871           summing (* (- (vref *x* i) meanx)
 872                      (- (vref *x* i) meanx) ))
 873        nm1))
 874
 875  )
 876
 877 ;;;;;;;;;;;;;;; Data stuff
 878
 879 (progn ;; Data setup
 880
 881   ;; Making data-frames (i.e. cases (rows) by variables (columns))
 882   ;; takes a bit of getting used to.  For this, it is important to
 883   ;; realize that we can do the following:
 884   ;; #1 - consider the possibility of having a row, and transposing
 885   ;; it, so the list-of-lists is:  ((1 2 3 4 5))     (1 row, 5 columns)
 886   ;; #2 - naturally list-of-lists: ((1)(2)(3)(4)(5)) (5 rows, 1 column)
 887   ;; see src/data/listoflist.lisp for code to process this particular
 888   ;; data structure.
 889   (defparameter *indep-vars-1-matrix*
 890     (transpose  (make-matrix 1 (length iron)
 891                  :initial-contents
 892                  (list (mapcar #'(lambda (x) (coerce x 'double-float))
 893                                iron))))
 894     "creating iron into double float, straightforward")
 895
 896   (documentation '*indep-vars-1-matrix* 'variable)
 897   ;; *indep-vars-1-matrix*
 898
 899   ;; or directly:
 900   (defparameter *indep-vars-1a-matrix*
 901     (make-matrix (length iron)  1
 902                  :initial-contents
 903                  (mapcar #'(lambda (x) (list  (coerce x 'double-float)))
 904                                iron)))
 905   ;; *indep-vars-1a-matrix*
 906
 907   ;; and mathematically, they seem equal:
 908   (m= *indep-vars-1-matrix* *indep-vars-1a-matrix*) ; => T
 909   ;; but of course not completely...
 910   (eql *indep-vars-1-matrix* *indep-vars-1a-matrix*) ; => NIL
 911   (eq *indep-vars-1-matrix* *indep-vars-1a-matrix*) ; => NIL
 912
 913   ;; and verify...
 914   (print *indep-vars-1-matrix*)
 915   (print *indep-vars-1a-matrix*)
 916
 917   (documentation 'lisp-matrix:bind2 'function) ; by which we mean:
 918   (documentation 'bind2 'function)
 919   (bind2 *indep-vars-1-matrix* *indep-vars-1a-matrix* :by :column) ; 2 col
 920   (bind2 *indep-vars-1-matrix* *indep-vars-1a-matrix* :by :row) ; 1 long col
 921
 922   ;; the weird way
 923   (defparameter *indep-vars-2-matrix*
 924     (transpose (make-matrix  2 (length iron)
 925                              :initial-contents
 926                              (list
 927                               (mapcar #'(lambda (x) (coerce x 'double-float))
 928                                       iron)
 929                               (mapcar #'(lambda (x) (coerce x 'double-float))
 930                                       aluminum)))))
 931   ;; *indep-vars-2-matrix*
 932
 933   ;; the "right"? way
 934   (defparameter *indep-vars-2-matrix*
 935     (make-matrix (length iron) 2
 936                  :initial-contents
 937                  (mapcar #'(lambda (x y)
 938                              (list (coerce x 'double-float)
 939                                    (coerce y 'double-float)))
 940                          iron aluminum)))
 941   ;; *indep-vars-2-matrix*
 942
 943
 944   ;; The below FAILS due to coercion issues; it just isn't lispy, it's R'y.
 945 #|
 946   (defparameter *dep-var* (make-vector (length absorbtion)
 947                                        :initial-contents (list absorbtion)))
 948 |#
 949   ;; BUT below, this should be the right type.
 950   (defparameter *dep-var*
 951     (make-vector (length absorbtion)
 952                  :type :row
 953                  :initial-contents
 954                  (list
 955                   (mapcar #'(lambda (x) (coerce x 'double-float))
 956                           absorbtion))))
 957   ;; *dep-var*
 958
 959
 960   (defparameter *dep-var-int*
 961     (make-vector (length absorbtion)
 962                  :type :row
 963                  :element-type 'integer
 964                  :initial-contents (list absorbtion)))
 965
 966   (typep *dep-var* 'matrix-like)        ; => T
 967   (typep *dep-var* 'vector-like)        ; => T
 968
 969   (typep *indep-vars-1-matrix* 'matrix-like) ; => T
 970   (typep *indep-vars-1-matrix* 'vector-like) ; => T
 971   (typep *indep-vars-2-matrix* 'matrix-like) ; => T
 972   (typep *indep-vars-2-matrix* 'vector-like) ; => F
 973
 974   iron
 975   ;; following fails, need to ensure that we work on list elts, not just
 976   ;; elts within a list:
 977   ;;
 978   ;;     (coerce iron 'real)
 979   ;;
 980   ;; the following is a general list-conversion coercion approach -- is
 981   ;; there a more efficient way?
 982   ;;     (coerce 1 'real)
 983   ;;     (mapcar #'(lambda (x) (coerce x 'double-float)) iron)
 984
 985   (princ "Data Set up"))
 986
 987
 988
 989
 990 (progn ;; Data setup
 991
 992   (describe 'make-matrix)
 993
 994   (defparameter *indep-vars-2-matrix*
 995     (make-matrix (length iron) 2
 996                  :initial-contents
 997                  (mapcar #'(lambda (x y)
 998                              (list (coerce x 'double-float)
 999                                    (coerce y 'double-float)))
1000                          iron aluminum)))
1001
1002
1003   (defparameter *dep-var*
1004     (make-vector (length absorbtion)
1005                  :type :row
1006                  :initial-contents
1007                  (list
1008                   (mapcar #'(lambda (x) (coerce x 'double-float))
1009                           absorbtion))))
1010
1011   (make-dataframe *dep-var*)
1012   (make-dataframe (transpose *dep-var*))
1013
1014   (defparameter *dep-var-int*
1015     (make-vector (length absorbtion)
1016                  :type :row
1017                  :element-type 'integer
1018                  :initial-contents (list absorbtion)))
1019
1020
1021   (defparameter *xv+1a*
1022     (make-matrix
1023      8 2
1024      :initial-contents #2A((1d0 1d0)
1025                            (1d0 3d0)
1026                            (1d0 2d0)
1027                            (1d0 4d0)
1028                            (1d0 3d0)
1029                            (1d0 5d0)
1030                            (1d0 4d0)
1031                            (1d0 6d0))))
1032
1033   (defparameter *xv+1b*
1034     (bind2
1035      (ones 8 1)
1036      (make-matrix
1037       8 1
1038       :initial-contents '((1d0)
1039                           (3d0)
1040                           (2d0)
1041                           (4d0)
1042                           (3d0)
1043                           (5d0)
1044                           (4d0)
1045                           (6d0)))
1046      :by :column))
1047
1048   (m= *xv+1a* *xv+1b*) ; => T
1049
1050   (princ "Data Set up"))
1051
1052
1053
1054 ;;;; LM
1055
1056 (progn
1057
1058   (defparameter *y*
1059     (make-vector
1060      8
1061      :type :row
1062      :initial-contents '((1d0 2d0 3d0 4d0 5d0 6d0 7d0 8d0))))
1063
1064
1065   (defparameter *xv+1*
1066     (make-matrix
1067      8 2
1068      :initial-contents '((1d0 1d0)
1069                          (1d0 3d0)
1070                          (1d0 2d0)
1071                          (1d0 4d0)
1072                          (1d0 3d0)
1073                          (1d0 5d0)
1074                          (1d0 4d0)
1075                          (1d0 6d0))))
1076
1077
1078   ;; so something like (NOTE: matrices are transposed to begin with, hence the incongruety)
1079   (defparameter *xtx-2* (m* (transpose *xv+1*) *xv+1*))
1080   ;; #<LA-SIMPLE-MATRIX-DOUBLE  2 x 2
1081   ;;  8.0d0 28.0d0
1082   ;;  28.0d0 116.0d0>
1083
1084   (defparameter *xty-2* (m* (transpose *xv+1*)  (transpose *y*)))
1085   ;; #<LA-SIMPLE-VECTOR-DOUBLE (2 x 1)
1086   ;;  36.0d0
1087   ;;  150.0d0>
1088
1089   (defparameter *rcond-2* 0.000001)
1090   (defparameter *betahat-2*  (gelsy *xtx-2* *xty-2* *rcond-2*))
1091   ;; *xtx-2* => "details of complete orthogonal factorization"
1092   ;; according to man page:
1093   ;; #<LA-SIMPLE-MATRIX-DOUBLE  2 x 2
1094   ;;  -119.33147112141039d0 -29.095426104883202d0
1095   ;;  0.7873402682880205d0 -1.20672274167718d0>
1096
1097   ;; *xty-2* => output becomes solution:
1098   ;; #<LA-SIMPLE-VECTOR-DOUBLE (2 x 1)
1099   ;;  -0.16666666666668312d0
1100   ;;  1.333333333333337d0>
1101
1102   *betahat-2* ; which matches R, see below
1103
1104   (documentation 'gelsy 'function)
1105
1106
1107 ;;   (#<LA-SIMPLE-VECTOR-DOUBLE (2 x 1)
1108 ;;    -0.16666666666668312 1.333333333333337>
1109 ;;    2)
1110
1111 ;;   ## Test case in R:
1112 ;;   x <- c( 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0)
1113 ;;   y <- c( 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0)
1114 ;;   lm(y~x)
1115 ;;   ## => Call:  lm(formula = y ~ x)
1116
1117 ;;   Coefficients:  (Intercept)            x
1118 ;;                      -0.1667       1.3333
1119
1120 ;;   summary(lm(y~x))
1121 ;;   ## =>
1122
1123 ;;   Call:
1124 ;;   lm(formula = y ~ x)
1125
1126 ;;   Residuals:
1127 ;;          Min         1Q     Median         3Q        Max
1128 ;;   -1.833e+00 -6.667e-01 -3.886e-16  6.667e-01  1.833e+00
1129
1130 ;;   Coefficients:
1131 ;;               Estimate Std. Error t value Pr(>|t|)
1132 ;;   (Intercept)  -0.1667     1.1587  -0.144  0.89034
1133 ;;   x             1.3333     0.3043   4.382  0.00466 **
1134 ;;   ---
1135 ;;   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
1136
1137 ;;   Residual standard error: 1.291 on 6 degrees of freedom
1138 ;;   Multiple R-squared: 0.7619,        Adjusted R-squared: 0.7222
1139 ;;   F-statistic:  19.2 on 1 and 6 DF,  p-value: 0.004659
1140
1141
1142
1143   ;; which suggests one might do (modulo ensuring correct
1144   ;; orientations).  When this is finalized, it should migrate to
1145   ;; CLS.
1146   ;;
1147
1148
1149   (defparameter *n* 20) ; # rows = # obsns
1150   (defparameter *p* 10) ; # cols = # vars
1151   (defparameter *x-temp*  (rand *n* *p*))
1152   (defparameter *b-temp*  (rand *p* 1))
1153   (defparameter *y-temp*  (m* *x-temp* *b-temp*))
1154   ;; so Y=Xb + \eps
1155   (defparameter *rcond* (* (coerce (expt 2 -52) 'double-float)
1156                    (max (nrows *x-temp*) (ncols *y-temp*))))
1157   (defparameter *orig-x* (copy *x-temp*))
1158   (defparameter *orig-b* (copy *b-temp*))
1159   (defparameter *orig-y* (copy *y-temp*))
1160
1161   (defparameter *lm-result* (lm *x-temp* *y-temp*))
1162   (princ (first *lm-result*))
1163   (princ (second *lm-result*))
1164   (princ (third *lm-result*))
1165   (v= (third *lm-result*)
1166       (v- (first (first *lm-result*))
1167           (first  (second *lm-result*))))
1168
1169
1170
1171
1172   ;; Some issues exist in the LAPACK vs. LINPACK variants, hence R
1173   ;; uses LINPACK primarily, rather than LAPACK.  See comments in R
1174   ;; source for issues.
1175
1176
1177   ;; Goal is to start from X, Y and then realize that if
1178   ;; Y = X \beta, then,   i.e. 8x1 = 8xp px1  + 8x1
1179   ;;      XtX \hat\beta = Xt Y
1180   ;; so that we can solve the equation  W \beta = Z   where W and Z
1181   ;; are known, to estimate \beta.
1182
1183   ;; the above is known to be numerically instable -- some processing
1184   ;; of X is preferred and should be done prior.  And most of the
1185   ;; transformation-based work does precisely that.
1186
1187   ;; recall:  Var[Y] = E[(Y - E[Y])(Y-E[Y])t]
1188   ;;   = E[Y Yt] - 2 \mu \mut + \mu \mut
1189   ;;   = E[Y Yt] - \mu \mut
1190
1191   ;; Var Y = E[Y^2] - \mu^2
1192
1193
1194   ;; For initial estimates of covariance of \hat\beta:
1195
1196   ;; \hat\beta = (Xt X)^-1 Xt Y
1197   ;; with E[ \hat\beta ]
1198   ;;        = E[ (Xt X)^-1 Xt Y ]
1199   ;;        = E[(Xt X)^-1 Xt (X\beta)]
1200   ;;        = \beta
1201   ;;
1202   ;; So Var[\hat\beta] = ...
1203   ;;     (Xt X)
1204   ;; and this gives SE(\beta_i) = (* (sqrt (mref Var i i)) adjustment)
1205
1206
1207   ;; from docs:
1208
1209   (setf *temp-result*
1210         (let ((*default-implementation* :foreign-array))
1211           (let* ((m 10)
1212                  (n 10)
1213                  (a (rand m n))
1214                  (x (rand n 1))
1215                  (b (m* a x))
1216                  (rcond (* (coerce (expt 2 -52) 'double-float)
1217                            (max (nrows a) (ncols a))))
1218                  (orig-a (copy a))
1219                  (orig-b (copy b))
1220                  (orig-x (copy x)))
1221             (list x (gelsy a b rcond))
1222             ;; no applicable conversion?
1223             ;; (m-   (#<FA-SIMPLE-VECTOR-DOUBLE (10 x 1))
1224             ;;       (#<FA-SIMPLE-VECTOR-DOUBLE (10 x 1)) )
1225             (v- x (first (gelsy a b rcond))))))
1226
1227
1228   (princ *temp-result*)
1229
1230   (setf *temp-result*
1231         (let ((*default-implementation* :lisp-array))
1232           (let* ((m 10)
1233                  (n 10)
1234                  (a (rand m n))
1235                  (x (rand n 1))
1236                  (b (m* a x))
1237                  (rcond (* (coerce (expt 2 -52) 'double-float)
1238                            (max (nrows a) (ncols a))))
1239                  (orig-a (copy a))
1240                  (orig-b (copy b))
1241                  (orig-x (copy x)))
1242             (list x (gelsy a b rcond))
1243             (m- x (first  (gelsy a b rcond)))
1244             )))
1245   (princ *temp-result*)
1246
1247
1248   (defparameter *xv*
1249     (make-vector
1250      8
1251      :type :row ;; default, not usually needed!
1252      :initial-contents '((1d0 3d0 2d0 4d0 3d0 5d0 4d0 6d0))))
1253
1254   (defparameter *y*
1255     (make-vector
1256      8
1257      :type :row
1258      :initial-contents '((1d0 2d0 3d0 4d0 5d0 6d0 7d0 8d0))))
1259
1260   ;; so something like (NOTE: matrices are transposed to begin with, hence the incongruety)
1261   (defparameter *xtx-1* (m* *xv* (transpose *xv*)))
1262   (defparameter *xty-1* (m* *xv* (transpose  *y*)))
1263   (defparameter *rcond-in* (* (coerce (expt 2 -52) 'double-float)
1264                               (max (nrows *xtx-1*)
1265                                    (ncols *xty-1*))))
1266
1267   (defparameter *betahat*  (gelsy *xtx-1* *xty-1* *rcond-in*))
1268
1269   ;;  (#<LA-SIMPLE-VECTOR-DOUBLE (1 x 1)
1270   ;;  1.293103448275862>
1271   ;;  1)
1272
1273   ;;   ## Test case in R:
1274   ;;   x <- c( 1.0, 3.0, 2.0, 4.0, 3.0, 5.0, 4.0, 6.0)
1275   ;;   y <- c( 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0)
1276   ;;   lm(y~x-1)
1277   ;;   ## =>
1278   ;;   Call:
1279   ;;   lm(formula = y ~ x - 1)
1280
1281   ;;   Coefficients:
1282   ;;       x
1283   ;;   1.293
1284
1285   (first  *betahat*))
1286
1287
1288
1289 #|
1290   (type-of #2A((1 2 3 4 5)
1291                (10 20 30 40 50)))
1292
1293   (type-of (rand 10 20))
1294
1295   (typep #2A((1 2 3 4 5)
1296              (10 20 30 40 50))
1297          'matrix-like)
1298
1299   (typep (rand 10 20) 'matrix-like)
1300
1301   (typep #2A((1 2 3 4 5)
1302              (10 20 30 40 50))
1303          'array)
1304
1305   (typep (rand 10 20) 'array)
1306 |#