64973154b72c4d387e95f417fadf471d650af968
[CommonLispStat.git] / examples / 10-basicEDA.lisp
blob64973154b72c4d387e95f417fadf471d650af968
1 ;;; -*- mode: lisp -*-
3 ;;; Time-stamp: <2012-10-11 16:07:25 tony>
4 ;;; Creation: <2009-04-19 09:41:09 tony>
5 ;;; File: basic-eda.lisp
6 ;;; Author: AJ Rossini <blindglobe@gmail.com>
7 ;;; Copyright: (c)2009--, AJ Rossini. See LICENSE.mit in top level
8 ;;; directory for conditions.
9 ;;; Purpose: Example of basic exploratory data analysis in CLS.
11 ;;; What is this talk of 'release'? Klingons do not make software
12 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
13 ;;; designers and quality assurance people in its wake.
15 (in-package :cls-examples)
17 ;; We assume that the "loading-data.lisp" code has been run, and one
18 ;; now wants to analyze the data loaded into *chickwts-df*
20 (load (localized-pathto "examples/00-loadingData.lisp")
21 :verbose t)
23 *chickwts-df*
26 ;;;;;;;;;;;;;;; EVERYTHING BELOW IS BROKEN
29 ;; Summarize is the basic EDA tool -- it accepts symbols or lists of
30 ;; symbols, to describe what, when, and how to do it. The resulting
31 ;; data structure has a means for re-invoking the result as well as
32 ;; partial storage of key results (when appropriate) as well as
33 ;; metadata about the results (time / context if provided).
35 ;; numerical: (txt / *ml / , variable / stream / spec'd to file)
36 ;; visual: (static/dynamic, fixed/interactive) (need better term than fixed)
38 ;; context -- using dataset metadata, to drive the resulting summary.
39 ;; dataset metadata:
40 ;; #1 sampling scheme -
41 ;; retro / prospect collection
42 ;; random, biased, convenience sampling;
43 ;; #2 purpose of dataset integration/manipulation
44 ;; #3 sampling/temporal component of variables
47 ;; Create metadata variable graph which provides an initial analysis
48 ;; structure for recording results.
50 (defparameter *chkwt-df-depgraph*
51 (let ((g (make-container 'graph-container )))
52 (loop for v in (var-list *chickwt-df*)
53 (add-vertex g v))
54 (loop for (v1 . v2) in (appropriate-pairs-list *chickwt-df*)
55 (add-edge-between-vertexes g v1 v2))))
57 (defparameter *my-df-smry-num*
58 (summarize *chickwts-df* :type 'numerical :io 'listing)
59 "First numerical summary of *my-df-smry*")
61 (defparameter *my-df-smry-num*
62 (summarize *my-df*
63 :type 'numerical
64 :io 'report-pdf
65 :device '(file "output.pdf"))
66 "First numerical summary of *my-df-smry*")
68 (defparameter *my-df-smry-vis*
69 (summarize *my-df*
70 :type 'visual
71 :io 'interactive
72 :device 'xwin)
73 "visual summary")
75 (defparameter *my-df-smry-vis*
76 (summarize *my-df* :type 'visual :io 'interactive-dynamic)
77 "visual summary")
79 (defparameter *my-df-smry-vis*
80 (summarize *my-df* :type 'visual :io 'static)
81 "visual summary")