class for nominal / ordinal variables.
[CommonLispStat.git] / src / data / data.lisp
blob514747fa4083020fc8c9548e5c2b7d1d046ec2e5
1 ;;; -*- mode: lisp -*-
3 ;;; Time-stamp: <2009-08-26 13:59:15 tony>
4 ;;; Creation: <2005-08-xx 21:34:07 rossini>
5 ;;; File: data.lisp
6 ;;; Author: AJ Rossini <blindglobe@gmail.com>
7 ;;; Copyright: (c)2005--2009, AJ Rossini. GPLv2
8 ;;; Purpose: data package for lispstat
10 ;;; What is this talk of 'release'? Klingons do not make software
11 ;;; 'releases'. Our software 'escapes', leaving a bloody trail of
12 ;;; designers and quality assurance people in its wake.
14 ;;; This organization and structure is new to the 21st Century
15 ;;; version.
17 (in-package :lisp-stat-data)
19 ;;; consider that data has 3 genotypic characteristics. The first
20 ;;; is storage form -- scalar, vector, array. second would be datarep
21 ;;; ("computer science simplistic data" type. in particular integer,
22 ;;; real, string, symbol. The last would be statistical type
23 ;;; ("usually handled by computer science approaches via metadata").
24 ;;; augmenting datarep type with use in a statistical context,
25 ;;; i.e. that would include nominal, ordinal, integer, continous,
26 ;;; interval (orderable subtypes). Clearly, the statistical type can
27 ;;; be inherited, likewise the numerical type as well. The form can
28 ;;; be pushed up or simplified as necessary, but this can be
29 ;;; challenging.
31 ;;; The first approach considered is for CLS to handle this as
32 ;;; lisp-only structures. When we realize an "abstract" model, the
33 ;;; data should be pushed into an appropriate form (either "en masse",
34 ;;; or "on-demand") into a linear algebra framework.
36 ;;; There is some excellent material on this by John Chambers in one
37 ;;; of his earlier books. Reference is being ignored to encourage
38 ;;; people to read them all. With all due respect to John, they've
39 ;;; lasted quite well, but need to be updated.
41 ;;; The purpose of this package is to manage data which will be
42 ;;; processed by LispStat. In particular, it will be important to
43 ;;; register variables, datasets, relational structures, and other
44 ;;; objects which could be the target for statistical modeling and
45 ;;; inference.
47 (defvar *lisp-stat-data-table* (make-hash-table)
48 "Marks up the data the could be used by.")
50 (defvar *lisp-stat-data-count* 0
51 "number of items currently recorded.")
53 ;;; Data (storage) Types, dt-{.*}
54 ;;;
55 ;;; Data types are the representation of data from a computer-science
56 ;;; perspective, i.e. what it is that they contain, in the sense of
57 ;;; scalars, arrays, networks, but not the actual values or
58 ;;; statistical behavour of the values. These types include
59 ;;; particular forms of compound types (i.e. dataframe is array-like,
60 ;;; but types differ, difference is row-wise, while array is a
61 ;;; compound of elements of the same type.
62 ;;;
63 ;;; This is completely subject to change, AND HAS. We use a class
64 ;;; heirarchy to generate the types.
67 ;;; Statistical Variable Types, sv-{.*} or statistical-variable-{.*}
68 ;;;
69 ;;; Statistical variable types work to represent the statistical
70 ;;; category represented by the variable, i.e. nominal, ordinal,
71 ;;; integral, continous, ratio. This metadata can be used to hint at
72 ;;; appropriate analysis methods -- or perhaps more critically, to
73 ;;; define how these methods will fail in the final interrpretation.
75 ;;; originally, these were considered to be types, but now, we
76 ;;; consider this in terms of abstract classes and mix-ins.
78 (defclass nominal-statistical-variable ()
79 ((data :initform nil
80 :accessor data
81 :type sequence)
82 (levels :initform nil
83 :accessor levels
84 :type sequence)))
86 (defclass ordinal-statistical-variable (nominal-statistical-variable)
87 ((ordering :initform nil
88 :accessor ordering
89 :type sequence)))
91 ;;;;
92 ;;;; Listing and Saving Variables and Functions
93 ;;;;
95 (defvar *variables* nil)
96 (defvar *ask-on-redefine* nil)
98 (defmacro def (symbol value)
99 "Syntax: (def var form)
100 VAR is not evaluated and must be a symbol. Assigns the value of FORM to
101 VAR and adds VAR to the list *VARIABLES* of def'ed variables. Returns VAR.
102 If VAR is already bound and the global variable *ASK-ON-REDEFINE*
103 is not nil then you are asked if you want to redefine the variable."
104 `(progn
105 (unless (and *ask-on-redefine*
106 (boundp ',symbol)
107 (not (y-or-n-p "Variable has a value. Redefine?")))
108 (defparameter ,symbol ,value))
109 (pushnew ',symbol *variables*)
110 ',symbol))
112 (defun variables-list ()
113 "Return list of variables as a lisp list of strings."
114 (mapcar #'intern (sort-data (mapcar #'string *variables*))))
116 (defun variables ()
117 "Args:()
118 Returns a list of the names of all def'ed variables to STREAM"
119 (if *variables*
120 (mapcar #'intern (sort-data (mapcar #'string *variables*)))))
122 (defun savevar (vars file)
123 "Args: (vars file-name-root)
124 VARS is a symbol or a list of symbols. FILE-NAME-ROOT is a string (or a symbol
125 whose print name is used) not endinf in .lsp. The VARS and their current values
126 are written to the file FILE-NAME-ROOT.lsp in a form suitable for use with the
127 load command."
128 (with-open-file (f (concatenate 'string (namestring file) ".lsp")
129 :direction :output)
130 (let ((vars (if (consp vars) vars (list vars))))
131 (flet ((save-one (x)
132 (let ((v (symbol-value x)))
133 (if (objectp v)
134 (format f "(def ~s ~s)~%" x (send v :save))
135 (format f "(def ~s '~s)~%" x v)))))
136 (mapcar #'save-one vars))
137 vars)))
139 (defun undef (v)
140 "Args: (v)
141 If V is the symbol of a defined variable the variable it is unbound and
142 removed from the list of defined variables. If V is a list of variable
143 names each is unbound and removed. Returns V."
144 (dolist (s (if (listp v) v (list v)))
145 (when (member s *variables*)
146 (setq *variables* (delete s *variables*))
147 (makunbound s)))