3 ;;; File: data-clos.lisp
4 ;;; Author: AJ Rossini <blindglobe@gmail.com>
5 ;;; Copyright: (c)2008, AJ Rossini. BSD, LLGPL, or GPLv2, depending
7 ;;; Purpose: data package for lispstat
8 ;;; Time-stamp: <2008-03-12 17:18:42 user>
9 ;;; Creation: <2008-03-12 17:18:42 user>
11 ;;; What is this talk of 'release'? Klingons do not make software
12 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
13 ;;; designers and quality assurance people in its wake.
15 ;;; This organization and structure is new to the 21st Century
20 ;;; redoing data structures in a CLOS based framework.
22 ;;; No real basis for work, there is a bit of new-ness and R-ness to
23 ;;; this work. In particular, the notion of relation is key and
24 ;;; integral to the analysis. Tables are related and matched
25 ;;; vectors,for example. "column" vectors are related observations
26 ;;; (by measure/recording) while "row" vectors are related readings
31 (defpackage :lisp-stat-data-clos
34 (:export dataset varNames caseNames
))
36 (in-package :lisp-stat-data-clos
)
39 ;; Need to figure out typed vectors. We then map a series of typed
40 ;; vectors over to tables where columns are equal typed. In a sense,
41 ;; this is a relation (1-1) of equal-typed arrays. For the most part,
42 ;; this ends up making the R data.frame into a relational building
43 ;; block (considering 1-1 mappings using row ID as a relation).
48 (defclass relation
())
54 :documentation
"Data storage slot. Should be an array or a
56 (documentation-string :initform nil
59 :documentation
"Information about dataset.")
60 (case-labels :initform nil
63 :documentation
"labels used for describing cases (doc
64 metadata), possibly used for merging.")
65 (var-labels :initform nil
68 :documentation
"Variable names."))
69 (:documentation
"Standard Cases by Variables Dataset."))
71 ;; Need to set up dataset as a table or a relation. One way or
72 ;; another it should all work out. TODO: How do we do multiple
73 ;; inheritance or composite structures?
76 "test ds for experiment.")
77 (setf my-ds-1
(make-instance 'dataset
))
81 (defun consistent-dataset-p (ds)
82 "Test that dataset is internally consistent with metadata.
83 Ensure that dims of stored data are same as case and var labels."
84 (equal (array-dimensions (dataset ds
))
85 (list (length (var-labels ds
))
86 (length (case-labels ds
)))))
89 "test ds for experiment.")
90 (setf my-ds-2
(make-instance 'dataset
91 :storage
#2A
((1 2 3 4 5) (10 20 30 40 50))
92 :doc
"This is an interesting dataset"
93 :case-labels
(list "a" "b" "c" "d" "e")
94 :var-labels
(list "x" "y")))
96 (consistent-dataset-p my-ds-2
)
98 (make-array (list 3 5))
100 (slot-value my-ds-2
'store
)
102 (eq (dataset my-ds-2
) (slot-value my-ds-2
'store
))
105 (case-labels my-ds-2
)
107 ;; need to ensure that for things like the following, that we protect
108 ;; this a bit more so that the results are not going to to be wrong.
109 ;; That would be a bit nasty if the dataset becomes inconsistent.
110 (setf (var-labels my-ds-2
) (list "a" "b"))
111 (setf (var-labels my-ds-2
) (list "a" "b" "c")) ;; Should error!
112 (consistent-dataset-p my-ds-2
) ;; F
113 (setf (var-labels my-ds-2
) (list "a" "b"))
114 (consistent-dataset-p my-ds-2
) ;; T
117 (defun print-structure-table (ds)
118 "example of what we want the methods to look like. Should be sort
119 of like a spreadsheet if the storage is a table."
120 (print-as-row (var-labels ds
))
122 (dolist (i (case-labels ds
))
123 (princ "%i %v" i
(row-extract (dataset ds
) (incr j
))))))
125 (defun print-structure-relational (ds)
126 "example of what we want the methods to look like. Should be sort
127 of like a graph of spreadsheets if the storage is a relational
129 (dolist (k (relations ds
))
130 (print-as-row (var-labels ds
))
132 (dolist (i (case-labels ds
))
133 (princ "%i %v" i
(row-extract (dataset ds
) (incr j
)))))))
138 (defgeneric extract
(dataform what into-form
))
140 (defmethod extract ((ds dataset
) what into-form
)
141 (reshape (get ds what
) into-form
))
145 (defclass data-format
())
151 (defun reorder-by-rank (x order
&key
(by-row t
))
154 (defun reorder-by-permutation (x perm
&key
(by-row t
))
158 ;;; Variable-name handling for Tables. Needs error checking.
162 (defun set-varNames (ds vN
)
163 (if (= (length (var-labels ds
))
165 (setf (var-labels ds
) vN
)
166 (error "wrong size.")))
168 (defsetf varNames set-varNames
)
171 (setf (varNames my-ds-2
) (list "a" "b"))
175 ;;; Case-name handling for Tables. Needs error checking.
176 (defun caseNames (ds)
179 (defun set-caseNames (ds vN
)
180 (if (= (length (case-labels ds
))
182 (setf (case-labels ds
) vN
)
183 (error "wrong size.")))
185 (defsetf caseNames set-caseNames
)
188 (setf (caseNames my-ds-2
) (list "a" "b" "c" 4 5))