more description of approach.
[CommonLispStat.git] / data-clos.lisp
blob1b159d48cef181dc1fac2952d01dcaf3a17da76d
1 ;;; -*- mode: lisp -*-
3 ;;; File: data-clos.lisp
4 ;;; Author: AJ Rossini <blindglobe@gmail.com>
5 ;;; Copyright: (c)2008, AJ Rossini. BSD, LLGPL, or GPLv2, depending
6 ;;; on how it arrives.
7 ;;; Purpose: data package for lispstat
8 ;;; Time-stamp: <2008-03-12 17:18:42 user>
9 ;;; Creation: <2008-03-12 17:18:42 user>
11 ;;; What is this talk of 'release'? Klingons do not make software
12 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
13 ;;; designers and quality assurance people in its wake.
15 ;;; This organization and structure is new to the 21st Century
16 ;;; version.
18 ;;; data-clos.lisp
19 ;;;
20 ;;; redoing data structures in a CLOS based framework.
21 ;;;
22 ;;; No real basis for work, there is a bit of new-ness and R-ness to
23 ;;; this work. In particular, the notion of relation is key and
24 ;;; integral to the analysis. Tables are related and matched
25 ;;; vectors,for example. "column" vectors are related observations
26 ;;; (by measure/recording) while "row" vectors are related readings
27 ;;; (by case)
29 (in-package :cl-user)
31 (defpackage :lisp-stat-data-clos
32 (:use :common-lisp
33 :clem )
34 (:export dataset varNames caseNames))
36 (in-package :lisp-stat-data-clos)
39 ;; Need to figure out typed vectors. We then map a series of typed
40 ;; vectors over to tables where columns are equal typed. In a sense,
41 ;; this is a relation (1-1) of equal-typed arrays. For the most part,
42 ;; this ends up making the R data.frame into a relational building
43 ;; block (considering 1-1 mappings using row ID as a relation).
46 (defclass table ())
48 (defclass relation ())
50 (defclass dataset ()
51 ((store :initform nil
52 :initarg :storage
53 :accessor dataset
54 :documentation "Data storage slot. Should be an array or a
55 relation,")
56 (documentation-string :initform nil
57 :initarg :doc
58 :accessor doc-string
59 :documentation "Information about dataset.")
60 (case-labels :initform nil
61 :initarg :case-labels
62 :accessor case-labels
63 :documentation "labels used for describing cases (doc
64 metadata), possibly used for merging.")
65 (var-labels :initform nil
66 :initarg :var-labels
67 :accessor var-labels
68 :documentation "Variable names."))
69 (:documentation "Standard Cases by Variables Dataset."))
71 ;; Need to set up dataset as a table or a relation. One way or
72 ;; another it should all work out. TODO: How do we do multiple
73 ;; inheritance or composite structures?
75 (defvar my-ds-1 nil
76 "test ds for experiment.")
77 (setf my-ds-1 (make-instance 'dataset))
78 my-ds-1
81 (defun consistent-dataset-p (ds)
82 "Test that dataset is internally consistent with metadata.
83 Ensure that dims of stored data are same as case and var labels."
84 (equal (array-dimensions (dataset ds))
85 (list (length (var-labels ds))
86 (length (case-labels ds)))))
88 (defvar my-ds-2 nil
89 "test ds for experiment.")
90 (setf my-ds-2 (make-instance 'dataset
91 :storage #2A((1 2 3 4 5) (10 20 30 40 50))
92 :doc "This is an interesting dataset"
93 :case-labels (list "a" "b" "c" "d" "e")
94 :var-labels (list "x" "y")))
96 (consistent-dataset-p my-ds-2)
97 my-ds-2
98 (make-array (list 3 5))
100 (slot-value my-ds-2 'store)
101 (dataset my-ds-2)
102 (eq (dataset my-ds-2) (slot-value my-ds-2 'store))
104 (doc-string my-ds-2)
105 (case-labels my-ds-2)
106 (var-labels my-ds-2)
107 ;; need to ensure that for things like the following, that we protect
108 ;; this a bit more so that the results are not going to to be wrong.
109 ;; That would be a bit nasty if the dataset becomes inconsistent.
110 (setf (var-labels my-ds-2) (list "a" "b"))
111 (setf (var-labels my-ds-2) (list "a" "b" "c")) ;; Should error!
112 (consistent-dataset-p my-ds-2) ;; F
113 (setf (var-labels my-ds-2) (list "a" "b"))
114 (consistent-dataset-p my-ds-2) ;; T
117 (defun print-structure-table (ds)
118 "example of what we want the methods to look like. Should be sort
119 of like a spreadsheet if the storage is a table."
120 (print-as-row (var-labels ds))
121 (let ((j -1))
122 (dolist (i (case-labels ds))
123 (princ "%i %v" i (row-extract (dataset ds) (incr j))))))
125 (defun print-structure-relational (ds)
126 "example of what we want the methods to look like. Should be sort
127 of like a graph of spreadsheets if the storage is a relational
128 structure."
129 (dolist (k (relations ds))
130 (print-as-row (var-labels ds))
131 (let ((j -1))
132 (dolist (i (case-labels ds))
133 (princ "%i %v" i (row-extract (dataset ds) (incr j)))))))
138 (defgeneric extract (dataform what into-form))
140 (defmethod extract ((ds dataset) what into-form)
141 (reshape (get ds what) into-form))
145 (defclass data-format())
148 (defun transpose (x)
149 "map NxM to MxN.")
151 (defun reorder-by-rank (x order &key (by-row t))
152 " .")
154 (defun reorder-by-permutation (x perm &key (by-row t))
155 " .")
157 ;;; Need to consider modification APIs
158 ;;; actions are:
159 ;;; - get/set row names (case names)
160 ;;; - column names (variable names)
161 ;;; - dataset values
162 ;;; - annotation/metadata
163 ;;; - extract a reformed version of the dataset (no additional input).
164 ;;; - either overwriting or not, i.e. with or without copy.
165 ;;; - check consistency
166 ;;; -
168 ;;; Variable-name handling for Tables. Needs error checking.
169 (defun varNames (ds)
170 (var-labels ds))
172 (defun set-varNames (ds vN)
173 (if (= (length (var-labels ds))
174 (length vN))
175 (setf (var-labels ds) vN)
176 (error "wrong size.")))
178 (defsetf varNames set-varNames)
180 (varNames my-ds-2)
181 (setf (varNames my-ds-2) (list "a" "b"))
182 (varNames my-ds-2)
185 ;;; Case-name handling for Tables. Needs error checking.
186 (defun caseNames (ds)
187 (case-labels ds))
189 (defun set-caseNames (ds vN)
190 (if (= (length (case-labels ds))
191 (length vN))
192 (setf (case-labels ds) vN)
193 (error "wrong size.")))
195 (defsetf caseNames set-caseNames)
197 (caseNames my-ds-2)
198 (setf (caseNames my-ds-2) (list "a" "b" "c" 4 5))
199 (caseNames my-ds-2)