cleaning up work supporting xarray changes.
[CommonLispStat.git] / src / data / data.lisp
blobc9edda954f76129d70ed3824685296367911d9b0
1 ;;; -*- mode: lisp -*-
3 ;;; Time-stamp: <2009-08-31 17:52:26 tony>
4 ;;; Creation: <2005-08-xx 21:34:07 rossini>
5 ;;; File: data.lisp
6 ;;; Author: AJ Rossini <blindglobe@gmail.com>
7 ;;; Copyright: (c)2005--2009, AJ Rossini. GPLv2
8 ;;; Purpose: data package for lispstat
10 ;;; What is this talk of 'release'? Klingons do not make software
11 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
12 ;;; designers and quality assurance people in its wake.
14 ;;; This organization and structure is new to the 21st Century
15 ;;; version.
17 (in-package :lisp-stat-data)
19 ;;; consider that data has 3 genotypic characteristics. The first
20 ;;; is storage form -- scalar, vector, array. second would be datarep
21 ;;; ("computer science simplistic data" type. in particular integer,
22 ;;; real, string, symbol. The last would be statistical type
23 ;;; ("usually handled by computer science approaches via metadata").
24 ;;; augmenting datarep type with use in a statistical context,
25 ;;; i.e. that would include nominal, ordinal, integer, continous,
26 ;;; interval (orderable subtypes). Clearly, the statistical type can
27 ;;; be inherited, likewise the numerical type as well. The form can
28 ;;; be pushed up or simplified as necessary, but this can be
29 ;;; challenging.
31 ;;; The first approach considered is for CLS to handle this as
32 ;;; lisp-only structures. When we realize an "abstract" model, the
33 ;;; data should be pushed into an appropriate form (either "en masse",
34 ;;; or "on-demand") into a linear algebra framework.
36 ;;; There is some excellent material on this by John Chambers in one
37 ;;; of his earlier books. Reference is being ignored to encourage
38 ;;; people to read them all. With all due respect to John, they've
39 ;;; lasted quite well, but need to be updated.
41 ;;; The purpose of this package is to manage data which will be
42 ;;; processed by LispStat. In particular, it will be important to
43 ;;; register variables, datasets, relational structures, and other
44 ;;; objects which could be the target for statistical modeling and
45 ;;; inference.
47 (defvar *lisp-stat-data-table* (make-hash-table)
48 "Marks up the data the could be used by.")
50 (defvar *lisp-stat-data-count* 0
51 "number of items currently recorded.")
53 ;;; Data (storage) Types, dt-{.*}
54 ;;;
55 ;;; Data types are the representation of data from a computer-science
56 ;;; perspective, i.e. what it is that they contain, in the sense of
57 ;;; scalars, arrays, networks, but not the actual values or
58 ;;; statistical behavour of the values. These types include
59 ;;; particular forms of compound types (i.e. dataframe is array-like,
60 ;;; but types differ, difference is row-wise, while array is a
61 ;;; compound of elements of the same type.
62 ;;;
63 ;;; This is completely subject to change, AND HAS. We use a class
64 ;;; heirarchy to generate the types, deriving from the virtual
65 ;;; dataframe-like and matrix-like classes to construct what we think
66 ;;; we might need.
69 ;;; Statistical Variable Types, sv-{.*} or statistical-variable-{.*}
70 ;;;
71 ;;; Statistical variable types work to represent the statistical
72 ;;; category represented by the variable, i.e. nominal, ordinal,
73 ;;; integral, continous, ratio. This metadata can be used to hint at
74 ;;; appropriate analysis methods -- or perhaps more critically, to
75 ;;; define how these methods will fail in the final interrpretation.
77 ;;; originally, these were considered to be types, but now, we
78 ;;; consider this in terms of abstract classes and mix-ins.
80 (defclass nominal-statistical-variable ()
81 ((data :initform nil
82 :accessor data
83 :type sequence)
84 (levels :initform nil
85 :accessor levels
86 :type sequence)))
88 (defclass ordinal-statistical-variable (nominal-statistical-variable)
89 ((ordering :initform nil
90 :accessor ordering
91 :type sequence)))
93 ;;;;
94 ;;;; Listing and Saving Variables and Functions
95 ;;;;
97 (defvar *variables* nil)
98 (defvar *ask-on-redefine* nil)
100 (defmacro def (symbol value)
101 "Syntax: (def var form)
102 VAR is not evaluated and must be a symbol. Assigns the value of FORM to
103 VAR and adds VAR to the list *VARIABLES* of def'ed variables. Returns VAR.
104 If VAR is already bound and the global variable *ASK-ON-REDEFINE*
105 is not nil then you are asked if you want to redefine the variable."
106 `(progn
107 (unless (and *ask-on-redefine*
108 (boundp ',symbol)
109 (not (y-or-n-p "Variable has a value. Redefine?")))
110 (defparameter ,symbol ,value))
111 (pushnew ',symbol *variables*)
112 ',symbol))
114 (defun variables-list ()
115 "Return list of variables as a lisp list of strings."
116 (mapcar #'intern (sort-data (mapcar #'string *variables*))))
118 (defun variables ()
119 "Args:()
120 Returns a list of the names of all def'ed variables to STREAM"
121 (if *variables*
122 (mapcar #'intern (sort-data (mapcar #'string *variables*)))))
124 (defun savevar (vars file)
125 "Args: (vars file-name-root)
126 VARS is a symbol or a list of symbols. FILE-NAME-ROOT is a string (or a symbol
127 whose print name is used) not endinf in .lsp. The VARS and their current values
128 are written to the file FILE-NAME-ROOT.lsp in a form suitable for use with the
129 load command."
130 (with-open-file (f (concatenate 'string (namestring file) ".lsp")
131 :direction :output)
132 (let ((vars (if (consp vars) vars (list vars))))
133 (flet ((save-one (x)
134 (let ((v (symbol-value x)))
135 (if (objectp v)
136 (format f "(def ~s ~s)~%" x (send v :save))
137 (format f "(def ~s '~s)~%" x v)))))
138 (mapcar #'save-one vars))
139 vars)))
141 (defun undef (v)
142 "Args: (v)
143 If V is the symbol of a defined variable the variable it is unbound and
144 removed from the list of defined variables. If V is a list of variable
145 names each is unbound and removed. Returns V."
146 (dolist (s (if (listp v) v (list v)))
147 (when (member s *variables*)
148 (setq *variables* (delete s *variables*))
149 (makunbound s)))