added run code for sdl-demos, but doesn't work on our current version of SBCL
[CommonLispStat.git] / data-clos.lisp
blob327140b1a8a42564913a31c7ca162fbedb86bf25
1 ;;; -*- mode: lisp -*-
3 ;;; File: data-clos.lisp
4 ;;; Author: AJ Rossini <blindglobe@gmail.com>
5 ;;; Copyright: (c)2008, AJ Rossini. BSD, LLGPL, or GPLv2, depending
6 ;;; on how it arrives.
7 ;;; Purpose: data package for lispstat
8 ;;; Time-stamp: <2008-03-12 17:18:42 user>
9 ;;; Creation: <2008-03-12 17:18:42 user>
11 ;;; What is this talk of 'release'? Klingons do not make software
12 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
13 ;;; designers and quality assurance people in its wake.
15 ;;; This organization and structure is new to the 21st Century
16 ;;; version.
18 ;;; data-clos.lisp
19 ;;;
20 ;;; redoing data structures in a CLOS based framework.
21 ;;;
22 ;;; No real basis for work, there is a bit of new-ness and R-ness to
23 ;;; this work. In particular, the notion of relation is key and
24 ;;; integral to the analysis. Tables are related and matched
25 ;;; vectors,for example. "column" vectors are related observations
26 ;;; (by measure/recording) while "row" vectors are related readings
27 ;;; (by case)
28 ;;;
30 (in-package :cl-user)
32 (defpackage :lisp-stat-data-clos
33 (:use :common-lisp
34 :clem )
35 (:export dataset ;; primary tool/structure
37 modifyData ;; metadata mods
38 importData ;; get it in
39 reshapeData ;; data mods
41 consistent-dataset-p
42 varNames caseNames))
44 (in-package :lisp-stat-data-clos)
46 ;; Need to figure out typed vectors. We then map a series of typed
47 ;; vectors over to tables where columns are equal typed. In a sense,
48 ;; this is a relation (1-1) of equal-typed arrays. For the most part,
49 ;; this ends up making the R data.frame into a relational building
50 ;; block (considering 1-1 mappings using row ID as a relation).
51 ;; Is this a worthwhile generalization?
53 (defclass statistical-dataset ()
54 ((store :initform nil
55 :initarg :storage
56 :accessor dataset
57 :documentation "Data storage slot. Should be an array or a
58 relation,")
59 (documentation-string :initform nil
60 :initarg :doc
61 :accessor doc-string
62 :documentation "Information about statistical-dataset.")
63 (case-labels :initform nil
64 :initarg :case-labels
65 :accessor case-labels
66 :documentation "labels used for describing cases (doc
67 metadata), possibly used for merging.")
68 (var-labels :initform nil
69 :initarg :var-labels
70 :accessor var-labels
71 :documentation "Variable names."))
72 (:documentation "Standard Cases by Variables Statistical-Dataset."))
75 ;; statistical-dataset is the basic cases by variables framework. Need to embed
76 ;; this within other structures which allow for generalized relations.
77 ;; Goal is to ensure that relations imply and drive the potential for
78 ;; statistical relativeness such as correlation, interference, and
79 ;; similar concepts.
81 ;; Actions on a statistical data structure.
83 (defgeneric consistent-statistical-dataset-p (ds)
84 (:documentation "methods to check for consistency."))
86 (defmethod consistent-statistical-dataset-p ((ds statistical-dataset))
87 "Test that statistical-dataset is internally consistent with metadata.
88 Ensure that dims of stored data are same as case and var labels."
89 (equal (array-dimensions (dataset ds))
90 (list (length (var-labels ds))
91 (length (case-labels ds)))))
94 (defun print-structure-table (ds)
95 "example of what we want the methods to look like. Should be sort
96 of like a spreadsheet if the storage is a table."
97 (print-as-row (var-labels ds))
98 (let ((j -1))
99 (dolist (i (case-labels ds))
100 (princ "%i %v" i (row-extract (dataset ds) (incr j))))))
102 (defun print-structure-relational (ds)
103 "example of what we want the methods to look like. Should be sort
104 of like a graph of spreadsheets if the storage is a relational
105 structure."
106 (dolist (k (relations ds))
107 (print-as-row (var-labels ds))
108 (let ((j -1))
109 (dolist (i (case-labels ds))
110 (princ "%i %v" i (row-extract (dataset ds) (incr j)))))))
115 (defgeneric reshapeData (dataform into-form as-copy)
116 (:documentation "pulling data into a new form"))
118 (defmethod reshapeData ((ds statistical-dataset) what into-form)
119 (reshape (get ds what) into-form))
121 (defmethod reshapeData ((ds array) (sp list) copy-p)
122 "Array via specList specialization: similar to the common R
123 approaches to redistribution."
124 (let ((widep (getf sp :toWide))
125 (primaryKey (getf sp :primaryKey)))
129 (defclass data-format())
131 (defun transpose (x)
132 "map NxM to MxN.")
134 (defun reorder-by-rank (x order &key (by-row t))
135 " .")
137 (defun reorder-by-permutation (x perm &key (by-row t))
138 " .")
140 ;;; Need to consider modification APIs
141 ;;; actions are:
142 ;;; - import
143 ;;; - get/set row names (case names)
144 ;;; - column names (variable names)
145 ;;; - dataset values
146 ;;; - annotation/metadata
147 ;;; - make sure that we do coherency checking in the exported
148 ;;; - functions.
149 ;;; - ...
150 ;;; - reshapeData/reformat/reshapr a reformed version of the dataset (no
151 ;;; additional input).
152 ;;; - either overwriting or not, i.e. with or without copy.
153 ;;; - check consistency of resulting data with metadata and related
154 ;;; data information.
155 ;;; -
157 ;;; Variable-name handling for Tables. Needs error checking.
158 (defun varNames (ds)
159 (var-labels ds))
161 (defun set-varNames (ds vN)
162 (if (= (length (var-labels ds))
163 (length vN))
164 (setf (var-labels ds) vN)
165 (error "wrong size.")))
167 (defsetf varNames set-varNames)
169 ;;; Case-name handling for Tables. Needs error checking.
170 (defun caseNames (ds)
171 (case-labels ds))
173 (defun set-caseNames (ds vN)
174 (if (= (length (case-labels ds))
175 (length vN))
176 (setf (case-labels ds) vN)
177 (error "wrong size.")))
179 (defsetf caseNames set-caseNames)
182 (defgeneric importData (source featureList)
183 (:documentation "command to get data into CLS. Specific methods
184 will need to handle files, internal data structures, and DBMS's. We
185 would like to be able to do:
192 (defun pathname-example (name)
193 (let ((my-path (parse-namestring name))))
194 (values (pathname-name my-path :case :common)
195 (pathname-name my-path :case :local)))
197 (defvar sourceTypes (list 'csv 'lisp 'tsv 'special)
198 "list of possible symbols used to specify source formats that might
199 be supported for input. CSV and TSV are standard, LISP refers to
200 forms, and SPECIAL refers to a FUNCTION which parses as
201 appropriately.")
203 ;;; WRONG LOGIC.
204 (defmethod importData ((fileHandle pathname)
205 (fmt list)) ;sourceTypes))
206 "File-based input for data.
207 Usually used by:
208 (importData (parse-namestring 'path/to/file')
209 (list :format 'csv))
211 (importData myPathName (list :format 'lisp))
213 (let ((newData (getDataAsLists fileHandle fmtType))
214 (fmtType (getf fmt :format)))
215 (case fmtType
216 ('csv ( ))
217 ('tsv ( ))
218 ('lisp ( ))
219 ('special (let ((parserFcn (getf fmt :special-parser)))))
220 (:default (error "no standard default importData format")))))
222 (defmethod importData ((ds array) (fmt list))
223 "mapping arrays into CLS data.")
226 (defmethod importData ((dsSpec DBMSandSQLextract)
227 (fmt mappingTypes))
228 "mapping DBMS into CLS data.")
232 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
233 ;;; EXPERIMENT
234 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
236 (in-package :cl-user)
238 (defpackage :lisp-stat-data-clos-example
239 (:use :common-lisp
240 :clem
241 :lisp-stat-data-clos))
243 (in-package :lisp-stat-data-clos-example)
246 ;;; Use of this package: To see what gets exported for use in others,
247 ;;; and how much corruption can be done to objects within a package.
251 (defvar my-ds-1 nil
252 "test ds for experiment.")
253 (setf my-ds-1 (make-instance 'statistical-dataset))
254 my-ds-1
257 (defvar my-ds-2 nil
258 "test ds for experiment.")
259 (setf my-ds-2 (make-instance 'statistical-dataset
260 :storage #2A((1 2 3 4 5) (10 20 30 40 50))
261 :doc "This is an interesting statistical-dataset"
262 :case-labels (list "a" "b" "c" "d" "e")
263 :var-labels (list "x" "y")))
265 (consistent-statistical-dataset-p my-ds-2)
266 my-ds-2
267 (make-array (list 3 5))
269 (slot-value my-ds-2 'store)
270 (dataset my-ds-2)
272 (eq (dataset my-ds-2) (slot-value my-ds-2 'store))
274 (doc-string my-ds-2)
275 (case-labels my-ds-2)
276 (var-labels my-ds-2)
279 ;; need to ensure that for things like the following, that we protect
280 ;; this a bit more so that the results are not going to to be wrong.
281 ;; That would be a bit nasty if the statistical-dataset becomes inconsistent.
282 (setf (var-labels my-ds-2) (list "a" "b"))
283 (setf (var-labels my-ds-2) (list "a" "b" "c")) ;; Should error!
284 (consistent-statistical-dataset-p my-ds-2) ;; F
285 (setf (var-labels my-ds-2) (list "a" "b"))
286 (consistent-statistical-dataset-p my-ds-2) ;; T
288 ;; This is now done by:
290 (varNames my-ds-2)
291 (setf (varNames my-ds-2) (list "a" "b"))
292 (varNames my-ds-2)
295 (defvar origCaseNames nil)
296 (setf origCaseNames (caseNames my-ds-2))
297 (setf (caseNames my-ds-2) (list "a" "b" "c" 4 5))
298 (caseNames my-ds-2)
299 (setf (caseNames my-ds-2) (list "a" "b" 4 5))
300 (setf (caseNames my-ds-2) origCaseNames)